diff --git a/application/backend/app/execution/training/otx_trainer.py b/application/backend/app/execution/training/otx_trainer.py
index 384c80aac91..07621fbe011 100644
--- a/application/backend/app/execution/training/otx_trainer.py
+++ b/application/backend/app/execution/training/otx_trainer.py
@@ -24,8 +24,8 @@
     OTXMultilabelClsDataset,
 )
 from otx.data.dataset.base import OTXDataset
+from otx.data.factory import TransformLibFactory
 from otx.data.module import OTXDataModule
-from otx.data.transform_libs.torchvision import TorchVisionTransformLib
 from otx.metrics import MetricCallable
 from otx.metrics.accuracy import MultiClassClsMetricCallable, MultiLabelClsMetricCallable
 from otx.metrics.mean_ap import MaskRLEMeanAPCallable, MeanAPCallable
@@ -229,9 +229,8 @@ def build_subset_config(subset_name: str) -> SubsetConfig:
             subset_cfg_data["input_size"] = training_config["data"]["input_size"]
             sampler_cfg_data = subset_cfg_data.pop("sampler", {})
             subset_config = SubsetConfig(sampler=SamplerConfig(**sampler_cfg_data), **subset_cfg_data)
-            subset_config.transforms = TorchVisionTransformLib.generate(  # pyrefly: ignore[bad-assignment]
-                subset_config
-            )
+            # pyrefly: ignore[missing-attribute,bad-assignment]
+            subset_config.transforms = TransformLibFactory.generate(subset_config)
             return subset_config
 
         with self._db_session_factory() as db:
@@ -291,15 +290,15 @@ def build_subset_config(subset_name: str) -> SubsetConfig:
             logger.info("Preparing {} instances for each subset", otx_dataset_class.__name__)
             otx_training_dataset = otx_dataset_class(
                 dm_subset=dm_training_dataset,
-                transforms=train_subset_config.transforms,  # pyrefly: ignore[bad-argument-type]
+                transforms=train_subset_config.transforms,  # pyrefly: ignore[missing-attribute,bad-argument-type]
             )
             otx_validation_dataset = otx_dataset_class(
                 dm_subset=dm_validation_dataset,
-                transforms=val_subset_config.transforms,  # pyrefly: ignore[bad-argument-type]
+                transforms=val_subset_config.transforms,  # pyrefly: ignore[missing-attribute,bad-argument-type]
             )
             otx_testing_dataset = otx_dataset_class(
                 dm_subset=dm_testing_dataset,
-                transforms=test_subset_config.transforms,  # pyrefly: ignore[bad-argument-type]
+                transforms=test_subset_config.transforms,  # pyrefly: ignore[missing-attribute,bad-argument-type]
             )
 
             return DatasetInfo(
@@ -368,8 +367,8 @@ def train_model(
         model_cfg["init_args"]["label_info"] = otx_datamodule.label_info.label_names
         model_cfg["init_args"]["data_input_params"] = DataInputParams(
             input_size=cast(tuple[int, int], otx_datamodule.input_size),
-            mean=otx_datamodule.input_mean,
-            std=otx_datamodule.input_std,
+            mean=otx_datamodule.input_mean if otx_datamodule.input_mean is not None else (0.0, 0.0, 0.0),
+            std=otx_datamodule.input_std if otx_datamodule.input_std is not None else (1.0, 1.0, 1.0),
         ).as_dict()
         model_parser = ArgumentParser()
         model_parser.add_subclass_arguments(OTXModel, "model", required=False, fail_untyped=False)
diff --git a/application/backend/tests/unit/execution/training/test_otx_trainer.py b/application/backend/tests/unit/execution/training/test_otx_trainer.py
index 1815efb6f03..8183b0ed317 100644
--- a/application/backend/tests/unit/execution/training/test_otx_trainer.py
+++ b/application/backend/tests/unit/execution/training/test_otx_trainer.py
@@ -402,7 +402,7 @@ def test_prepare_training_dataset_success(
                     "batch_size": 8,
                     "num_workers": 4,
                     "sampler": {"class_path": "torch.utils.data.RandomSampler"},
-                    "transforms": [
+                    "augmentations_cpu": [
                         {"class_path": "torchvision.transforms.v2.RandomHorizontalFlip", "init_args": {"p": 0.5}}
                     ],
                 },
@@ -410,23 +410,23 @@ def test_prepare_training_dataset_success(
                     "batch_size": 4,
                     "num_workers": 2,
                     "sampler": {"class_path": "torch.utils.data.RandomSampler"},
-                    "transforms": [],
+                    "augmentations_cpu": [],
                 },
                 "test_subset": {
                     "batch_size": 2,
                     "num_workers": 1,
                     "sampler": {"class_path": "torch.utils.data.RandomSampler"},
-                    "transforms": [],
+                    "augmentations_cpu": [],
                 },
             }
         }
 
-        # Mock TorchVisionTransformLib.generate to return mock transforms
+        # Mock TransformLibFactory.generate to return mock transforms
         mock_train_transforms = [Mock()]
         mock_val_transforms = [Mock()]
         mock_test_transforms = [Mock()]
 
-        with patch("app.execution.training.otx_trainer.TorchVisionTransformLib.generate") as mock_generate:
+        with patch("app.execution.training.otx_trainer.TransformLibFactory.generate") as mock_generate:
             mock_generate.side_effect = [mock_train_transforms, mock_val_transforms, mock_test_transforms]
 
             # Mock the __get_otx_dataset_class_by_task_type method to return a proper mock class
@@ -520,14 +520,17 @@ def test_prepare_training_dataset_success(
         # Verify SubsetConfig objects were created correctly
         assert dataset_info.otx_training_subset_config.batch_size == 8
         assert dataset_info.otx_training_subset_config.num_workers == 4
+        # pyrefly: ignore[missing-attribute]
         assert dataset_info.otx_training_subset_config.transforms == mock_train_transforms
 
         assert dataset_info.otx_validation_subset_config.batch_size == 4
         assert dataset_info.otx_validation_subset_config.num_workers == 2
+        # pyrefly: ignore[missing-attribute]
         assert dataset_info.otx_validation_subset_config.transforms == mock_val_transforms
 
         assert dataset_info.otx_testing_subset_config.batch_size == 2
         assert dataset_info.otx_testing_subset_config.num_workers == 1
+        # pyrefly: ignore[missing-attribute]
         assert dataset_info.otx_testing_subset_config.transforms == mock_test_transforms
 
 
diff --git a/application/backend/uv.lock b/application/backend/uv.lock
index 17eb61e2a54..64da9e9b3ff 100644
--- a/application/backend/uv.lock
+++ b/application/backend/uv.lock
@@ -492,7 +492,7 @@ wheels = [
 [[package]]
 name = "datumaro"
 version = "2.0.0"
-source = { git = "https://github.com/open-edge-platform/datumaro.git?rev=develop#af58f5cfdfe0584036287584fd38216e390eb292" }
+source = { git = "https://github.com/open-edge-platform/datumaro.git?rev=develop#86aa54056927048a5424956379cb93d2680e71b9" }
 dependencies = [
     { name = "attrs" },
     { name = "cachetools" },
@@ -1352,6 +1352,41 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2a/8f/8f6f491d595a9e5912971f3f863d81baddccc8a4d0c3749d6a0dd9ffc9df/kiwisolver-1.4.9-cp313-cp313t-win_arm64.whl", hash = "sha256:0749fd8f4218ad2e851e11cc4dc05c7cbc0cbc4267bdfdb31782e65aace4ee9c", size = 68646, upload-time = "2025-08-10T21:27:00.52Z" },
 ]
 
+[[package]]
+name = "kornia"
+version = "0.8.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "kornia-rs", marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
+    { name = "packaging", marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'linux' and sys_platform != 'win32' and extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (sys_platform == 'linux' and extra != 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (sys_platform == 'linux' and extra != 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (sys_platform == 'win32' and extra != 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (sys_platform == 'win32' and extra != 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra != 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra != 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra != 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
+    { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(sys_platform == 'linux' and extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (sys_platform == 'win32' and extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
+    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra != 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra != 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-3-otx-cuda' and extra != 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra != 'extra-3-otx-cuda' and extra != 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu') or (extra != 'extra-3-otx-cpu' and extra != 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra != 'extra-3-otx-cpu' and extra != 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra != 'extra-3-otx-cpu' and extra != 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
+    { name = "torch", version = "2.10.0+xpu", source = { registry = "https://download.pytorch.org/whl/xpu" }, marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/e6/45e757d4924176e4d4e111e10effaab7db382313243e0188a06805010073/kornia-0.8.2.tar.gz", hash = "sha256:5411b2ce0dd909d1608016308cd68faeef90f88c47f47e8ecd40553fd4d8b937", size = 667151, upload-time = "2025-11-08T12:10:03.042Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/d4/e9bd12b7b4cbd23b4dfb47e744ee1fa54d6d9c3c9bc406ec86c1be8c8307/kornia-0.8.2-py2.py3-none-any.whl", hash = "sha256:32dfe77c9c74a87a2de49395aa3c2c376a1b63c27611a298b394d02d13905819", size = 1095012, upload-time = "2025-11-08T12:10:01.226Z" },
+]
+
+[[package]]
+name = "kornia-rs"
+version = "0.1.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ab/17/8b3518ece01512a575b18f86b346879793d3dea264b314796bbd44d42e11/kornia_rs-0.1.10.tar.gz", hash = "sha256:5fd3fbc65240fa751975f5870b079f98e7fdcaa2885ea577b3da324d8bf01d81", size = 145610, upload-time = "2025-11-08T11:29:32.399Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/90/01/1d658b11635431f8c31f416c90ca99befdc1f4fdd20e91a05b480b9c0ea8/kornia_rs-0.1.10-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:950a943f91c2cff94d80282886b0d48bbc15ef4a7cc4b15ac819724dfdb2f414", size = 2811810, upload-time = "2025-11-08T11:30:22.497Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/ed/bd970ded1d819557cc33055d982b1847eb385151ea5b0c915c16ed74f5c0/kornia_rs-0.1.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:63b802aaf95590276d3426edc6d23ff11caf269d2bc2ec37cb6c679b7b2a8ee0", size = 2076195, upload-time = "2025-11-08T11:30:08.726Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/10/afd700455105fdba5b043d724f3a65ca36259b89c736a3b71d5a03103808/kornia_rs-0.1.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38087da7cdf2bffe10530c0d53335dd1fc107fae6521f2dd4797c6522b6d11b3", size = 2205781, upload-time = "2025-11-08T11:29:36.8Z" },
+    { url = "https://files.pythonhosted.org/packages/25/16/ec8dc3ce1d79660ddd6a186a77037e0c3bf61648e6c72250280b648fb291/kornia_rs-0.1.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa3464de8f9920d87415721c36840ceea23e054dcb54dd9f69189ba9eabce0c7", size = 3042272, upload-time = "2025-11-08T11:29:52.936Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/75/62785aba777d35a562a97a987d65840306fab7a8ecd2d928dd8ac779e29b/kornia_rs-0.1.10-cp313-cp313-win_amd64.whl", hash = "sha256:c57d157bebe64c22e2e44c72455b1c7365eee4d767e0c187dc28f22d072ebaf7", size = 2539802, upload-time = "2025-11-08T11:30:35.753Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/d5/32b23d110109eb77b2dc952be75411f7e495da9105058e2cb08924a9cc90/kornia_rs-0.1.10-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:0b375f02422ef5986caed612799b4ddcc91f57f303906868b0a8c397a17e7607", size = 2810244, upload-time = "2025-11-08T11:30:23.637Z" },
+    { url = "https://files.pythonhosted.org/packages/96/5f/5ecde42b7c18e7df26c413848a98744427c3d370f5eed725b65f0bc356fb/kornia_rs-0.1.10-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f2bcfa438d6b5dbe07d573afc980f2871f6639b2eac5148b8c0bba4f82357b9a", size = 2074220, upload-time = "2025-11-08T11:30:09.972Z" },
+    { url = "https://files.pythonhosted.org/packages/18/6c/6fc86eb855bcc723924c3b91de98dc6c0f381987ce582e080b8eade3bc88/kornia_rs-0.1.10-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:021b0a02b2356b12b3954a298f369ed4fe2dd522dcf8b6d72f91bf3bd8eea201", size = 2204672, upload-time = "2025-11-08T11:29:38.777Z" },
+    { url = "https://files.pythonhosted.org/packages/19/26/3ac706d1b36761c0f7a36934327079adcb42d761c8c219865123d49fc1b2/kornia_rs-0.1.10-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d9b07e2ae79e423b3248d94afd092e324c5ddfe3157fafc047531cc8bffa6a3", size = 3042797, upload-time = "2025-11-08T11:29:54.719Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/f4/d62728d86bc67f5516249b154ff0bdfcf38a854dae284ff0ce62da87af99/kornia_rs-0.1.10-cp313-cp313t-win_amd64.whl", hash = "sha256:b80a037e34d63cb021bcd5fc571e41aff804a2981311f66e883768c6b8e5f8de", size = 2543855, upload-time = "2025-11-08T11:30:37.437Z" },
+]
+
 [[package]]
 name = "lightning"
 version = "2.6.0"
@@ -2179,6 +2214,7 @@ dependencies = [
     { name = "ftfy", marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
     { name = "importlib-resources", marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
     { name = "jsonargparse", marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
+    { name = "kornia", marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
     { name = "lightning", marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
     { name = "nncf", marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
     { name = "numpy", marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
@@ -2231,6 +2267,7 @@ requires-dist = [
     { name = "furo", marker = "extra == 'docs'" },
     { name = "importlib-resources", specifier = "==6.5.2" },
     { name = "jsonargparse", specifier = "==4.35.0" },
+    { name = "kornia", specifier = "~=0.8.2" },
     { name = "lightning", specifier = "==2.6.0" },
     { name = "linkify-it-py", marker = "extra == 'docs'" },
     { name = "myst-parser", marker = "extra == 'docs'", specifier = ">=2.0.0" },
@@ -3943,12 +3980,12 @@ dependencies = [
     { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(sys_platform == 'linux' and extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (sys_platform == 'win32' and extra == 'extra-3-otx-cpu' and extra == 'extra-9-geti-tune-cpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fe54cbd5942cd0b26a90f1748f0d4421caf67be35c281c6c3b8573733a03d630" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:90eec299e1f82cfaf080ccb789df3838cb9a54b57e2ebe33852cd392c692de5c" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:783c8fc580bbfc159bff52f4f72cdd538e42b32956e70dffa42b940db114e151" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e985e12a9a232618e5a43476de5689e4b14989f5da6b93909c57afa57ec27012" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:813f0106eb3e268f3783da67b882458e544c6fb72f946e6ca64b5ed4e62c6a77" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:9212210f417888e6261c040495180f053084812cf873dedba9fc51ff4b24b2d3" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fe54cbd5942cd0b26a90f1748f0d4421caf67be35c281c6c3b8573733a03d630" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:90eec299e1f82cfaf080ccb789df3838cb9a54b57e2ebe33852cd392c692de5c" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:783c8fc580bbfc159bff52f4f72cdd538e42b32956e70dffa42b940db114e151" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e985e12a9a232618e5a43476de5689e4b14989f5da6b93909c57afa57ec27012" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:813f0106eb3e268f3783da67b882458e544c6fb72f946e6ca64b5ed4e62c6a77" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:9212210f417888e6261c040495180f053084812cf873dedba9fc51ff4b24b2d3" },
 ]
 
 [[package]]
@@ -3961,12 +3998,12 @@ dependencies = [
     { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra != 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra != 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-3-otx-cuda' and extra != 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra != 'extra-3-otx-cuda' and extra != 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu') or (extra != 'extra-3-otx-cpu' and extra != 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra != 'extra-3-otx-cpu' and extra != 'extra-3-otx-cuda' and extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra != 'extra-3-otx-cpu' and extra != 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:12c253520a26483fe3c614f63ff16eca6d9b0b4ebe510699b7d15d88e6c0cd35" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:a9c0de893dce9c2913c9c7ae88a916910f92d02b99da149678806d18e8079f29" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:e2e0317e3861bba1b5aeba7c1cb4bcd50937cf0bffdbea478619d1f5f73e9050" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:58b2971b55c761f1d2491bd80fcc4618ea97d363d387a9dd3aff23220cbee264" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:1b6878b043513ea3dea1b90bfb5193455d9b248b8c4d5e66ea9f5d1643a43f13" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:96cd2ba7b289117873b2a8f4c80605d38118d920b1045f3ce21a9f0ca68a701e" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:12c253520a26483fe3c614f63ff16eca6d9b0b4ebe510699b7d15d88e6c0cd35" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:a9c0de893dce9c2913c9c7ae88a916910f92d02b99da149678806d18e8079f29" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:e2e0317e3861bba1b5aeba7c1cb4bcd50937cf0bffdbea478619d1f5f73e9050" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:58b2971b55c761f1d2491bd80fcc4618ea97d363d387a9dd3aff23220cbee264" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:1b6878b043513ea3dea1b90bfb5193455d9b248b8c4d5e66ea9f5d1643a43f13" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:96cd2ba7b289117873b2a8f4c80605d38118d920b1045f3ce21a9f0ca68a701e" },
 ]
 
 [[package]]
@@ -3979,10 +4016,10 @@ dependencies = [
     { name = "torch", version = "2.10.0+xpu", source = { registry = "https://download.pytorch.org/whl/xpu" }, marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-xpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-cuda') or (extra == 'extra-9-geti-tune-cpu' and extra == 'extra-9-geti-tune-xpu') or (extra == 'extra-9-geti-tune-cuda' and extra == 'extra-9-geti-tune-xpu')" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80269f37865fcd8b57f20e4786efae2200bfa2b2727926c3c7acc82f0e7d3548" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313-win_amd64.whl", hash = "sha256:1c4b44b36a557f7381e3076fb8843366742238648441d607c8d049c6da0f8886" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:216ad249333993ed128368f996210cc9ceae3b4d15709b25aadba844d6c6e8b7" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313t-win_amd64.whl", hash = "sha256:7a04beba6859b76e9e010f2f0eccf13ce70ff5942944a552e83844c166051515" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80269f37865fcd8b57f20e4786efae2200bfa2b2727926c3c7acc82f0e7d3548" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313-win_amd64.whl", hash = "sha256:1c4b44b36a557f7381e3076fb8843366742238648441d607c8d049c6da0f8886" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:216ad249333993ed128368f996210cc9ceae3b4d15709b25aadba844d6c6e8b7" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313t-win_amd64.whl", hash = "sha256:7a04beba6859b76e9e010f2f0eccf13ce70ff5942944a552e83844c166051515" },
 ]
 
 [[package]]
diff --git a/library/pyproject.toml b/library/pyproject.toml
index e03315b70bd..b4a22c8aff4 100644
--- a/library/pyproject.toml
+++ b/library/pyproject.toml
@@ -51,6 +51,7 @@ dependencies = [
     "pyyaml==6.0.3",
     "tensorboard==2.20.0",
     "tensorboardx==2.6.4",
+    "kornia~=0.8.2",
     # NOTE: rf-detr is consumed from a specific commit that includes export fixes
     # for torch>=2.10 and dynamo export support. Once the upstream roboflow/rf-detr
     # repository publishes a tagged release with these changes, migrate to the
diff --git a/library/pyrefly-baseline.json b/library/pyrefly-baseline.json
index ef253e86a55..82826ddb9c8 100644
--- a/library/pyrefly-baseline.json
+++ b/library/pyrefly-baseline.json
@@ -157,9 +157,9 @@
       "severity": "error"
     },
     {
-      "line": 297,
+      "line": 303,
       "column": 27,
-      "stop_line": 297,
+      "stop_line": 303,
       "stop_column": 75,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -169,9 +169,9 @@
       "severity": "error"
     },
     {
-      "line": 479,
+      "line": 485,
       "column": 67,
-      "stop_line": 479,
+      "stop_line": 485,
       "stop_column": 81,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -181,9 +181,9 @@
       "severity": "error"
     },
     {
-      "line": 481,
+      "line": 487,
       "column": 16,
-      "stop_line": 481,
+      "stop_line": 487,
       "stop_column": 30,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -193,9 +193,9 @@
       "severity": "error"
     },
     {
-      "line": 648,
+      "line": 654,
       "column": 39,
-      "stop_line": 648,
+      "stop_line": 654,
       "stop_column": 48,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -205,9 +205,9 @@
       "severity": "error"
     },
     {
-      "line": 655,
+      "line": 661,
       "column": 30,
-      "stop_line": 655,
+      "stop_line": 661,
       "stop_column": 53,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -217,9 +217,9 @@
       "severity": "error"
     },
     {
-      "line": 655,
+      "line": 661,
       "column": 66,
-      "stop_line": 655,
+      "stop_line": 661,
       "stop_column": 80,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -229,9 +229,9 @@
       "severity": "error"
     },
     {
-      "line": 887,
+      "line": 893,
       "column": 13,
-      "stop_line": 887,
+      "stop_line": 893,
       "stop_column": 31,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -241,9 +241,9 @@
       "severity": "error"
     },
     {
-      "line": 932,
+      "line": 943,
       "column": 36,
-      "stop_line": 932,
+      "stop_line": 943,
       "stop_column": 49,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -253,9 +253,9 @@
       "severity": "error"
     },
     {
-      "line": 934,
+      "line": 945,
       "column": 30,
-      "stop_line": 934,
+      "stop_line": 945,
       "stop_column": 43,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -265,9 +265,9 @@
       "severity": "error"
     },
     {
-      "line": 941,
+      "line": 952,
       "column": 16,
-      "stop_line": 941,
+      "stop_line": 952,
       "stop_column": 22,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -277,9 +277,9 @@
       "severity": "error"
     },
     {
-      "line": 950,
+      "line": 961,
       "column": 27,
-      "stop_line": 950,
+      "stop_line": 961,
       "stop_column": 35,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -289,9 +289,9 @@
       "severity": "error"
     },
     {
-      "line": 953,
+      "line": 964,
       "column": 29,
-      "stop_line": 953,
+      "stop_line": 964,
       "stop_column": 44,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -301,9 +301,9 @@
       "severity": "error"
     },
     {
-      "line": 955,
+      "line": 966,
       "column": 29,
-      "stop_line": 955,
+      "stop_line": 966,
       "stop_column": 45,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -313,9 +313,9 @@
       "severity": "error"
     },
     {
-      "line": 957,
+      "line": 968,
       "column": 29,
-      "stop_line": 957,
+      "stop_line": 968,
       "stop_column": 43,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -325,21 +325,21 @@
       "severity": "error"
     },
     {
-      "line": 959,
+      "line": 970,
       "column": 29,
-      "stop_line": 959,
-      "stop_column": 48,
+      "stop_line": 970,
+      "stop_column": 54,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `type[LearningRateMonitor]` is not assignable to parameter with type `Callback`",
-      "concise_description": "Argument `type[LearningRateMonitor]` is not assignable to parameter with type `Callback`",
+      "description": "Argument `type[SimpleLearningRateMonitor]` is not assignable to parameter with type `Callback`",
+      "concise_description": "Argument `type[SimpleLearningRateMonitor]` is not assignable to parameter with type `Callback`",
       "severity": "error"
     },
     {
-      "line": 961,
+      "line": 972,
       "column": 29,
-      "stop_line": 961,
+      "stop_line": 972,
       "stop_column": 44,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -349,9 +349,9 @@
       "severity": "error"
     },
     {
-      "line": 964,
+      "line": 975,
       "column": 29,
-      "stop_line": 964,
+      "stop_line": 975,
       "stop_column": 42,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -361,9 +361,9 @@
       "severity": "error"
     },
     {
-      "line": 971,
+      "line": 982,
       "column": 29,
-      "stop_line": 971,
+      "stop_line": 982,
       "stop_column": 52,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -373,9 +373,9 @@
       "severity": "error"
     },
     {
-      "line": 980,
+      "line": 991,
       "column": 29,
-      "stop_line": 980,
+      "stop_line": 991,
       "stop_column": 42,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -385,9 +385,21 @@
       "severity": "error"
     },
     {
-      "line": 1058,
+      "line": 995,
+      "column": 29,
+      "stop_line": 995,
+      "stop_column": 52,
+      "path": "src/otx/backend/native/engine.py",
+      "code": -2,
+      "name": "bad-argument-type",
+      "description": "Argument `type[GPUAugmentationCallback]` is not assignable to parameter with type `Callback`",
+      "concise_description": "Argument `type[GPUAugmentationCallback]` is not assignable to parameter with type `Callback`",
+      "severity": "error"
+    },
+    {
+      "line": 1104,
       "column": 25,
-      "stop_line": 1058,
+      "stop_line": 1104,
       "stop_column": 48,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -397,9 +409,9 @@
       "severity": "error"
     },
     {
-      "line": 1073,
+      "line": 1119,
       "column": 45,
-      "stop_line": 1073,
+      "stop_line": 1119,
       "stop_column": 60,
       "path": "src/otx/backend/native/engine.py",
       "code": -2,
@@ -421,9 +433,9 @@
       "severity": "error"
     },
     {
-      "line": 52,
+      "line": 64,
       "column": 9,
-      "stop_line": 52,
+      "stop_line": 64,
       "stop_column": 25,
       "path": "src/otx/backend/native/lightning/accelerators/xpu.py",
       "code": -2,
@@ -457,9 +469,9 @@
       "severity": "error"
     },
     {
-      "line": 92,
+      "line": 96,
       "column": 5,
-      "stop_line": 92,
+      "stop_line": 96,
       "stop_column": 23,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -469,9 +481,9 @@
       "severity": "error"
     },
     {
-      "line": 167,
+      "line": 168,
       "column": 54,
-      "stop_line": 167,
+      "stop_line": 168,
       "stop_column": 64,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -481,9 +493,9 @@
       "severity": "error"
     },
     {
-      "line": 181,
+      "line": 173,
       "column": 51,
-      "stop_line": 181,
+      "stop_line": 173,
       "stop_column": 60,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -493,9 +505,9 @@
       "severity": "error"
     },
     {
-      "line": 197,
+      "line": 188,
       "column": 9,
-      "stop_line": 197,
+      "stop_line": 188,
       "stop_column": 22,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -505,9 +517,9 @@
       "severity": "error"
     },
     {
-      "line": 231,
+      "line": 222,
       "column": 20,
-      "stop_line": 231,
+      "stop_line": 222,
       "stop_column": 36,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -517,9 +529,9 @@
       "severity": "error"
     },
     {
-      "line": 235,
+      "line": 226,
       "column": 9,
-      "stop_line": 235,
+      "stop_line": 226,
       "stop_column": 24,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -529,9 +541,9 @@
       "severity": "error"
     },
     {
-      "line": 258,
+      "line": 249,
       "column": 69,
-      "stop_line": 258,
+      "stop_line": 249,
       "stop_column": 74,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -541,9 +553,9 @@
       "severity": "error"
     },
     {
-      "line": 262,
+      "line": 253,
       "column": 20,
-      "stop_line": 262,
+      "stop_line": 253,
       "stop_column": 25,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -553,9 +565,9 @@
       "severity": "error"
     },
     {
-      "line": 267,
+      "line": 258,
       "column": 20,
-      "stop_line": 267,
+      "stop_line": 258,
       "stop_column": 25,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -565,9 +577,9 @@
       "severity": "error"
     },
     {
-      "line": 271,
+      "line": 262,
       "column": 9,
-      "stop_line": 271,
+      "stop_line": 262,
       "stop_column": 18,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -577,9 +589,9 @@
       "severity": "error"
     },
     {
-      "line": 289,
+      "line": 280,
       "column": 69,
-      "stop_line": 289,
+      "stop_line": 280,
       "stop_column": 74,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -589,9 +601,9 @@
       "severity": "error"
     },
     {
-      "line": 293,
+      "line": 284,
       "column": 20,
-      "stop_line": 293,
+      "stop_line": 284,
       "stop_column": 25,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -601,9 +613,9 @@
       "severity": "error"
     },
     {
-      "line": 298,
+      "line": 289,
       "column": 20,
-      "stop_line": 298,
+      "stop_line": 289,
       "stop_column": 25,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -613,9 +625,9 @@
       "severity": "error"
     },
     {
-      "line": 302,
+      "line": 293,
       "column": 9,
-      "stop_line": 302,
+      "stop_line": 293,
       "stop_column": 21,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -625,9 +637,9 @@
       "severity": "error"
     },
     {
-      "line": 310,
+      "line": 301,
       "column": 48,
-      "stop_line": 310,
+      "stop_line": 301,
       "stop_column": 53,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -637,9 +649,9 @@
       "severity": "error"
     },
     {
-      "line": 312,
+      "line": 303,
       "column": 39,
-      "stop_line": 312,
+      "stop_line": 303,
       "stop_column": 44,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -649,9 +661,9 @@
       "severity": "error"
     },
     {
-      "line": 317,
+      "line": 308,
       "column": 16,
-      "stop_line": 317,
+      "stop_line": 308,
       "stop_column": 23,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -661,9 +673,9 @@
       "severity": "error"
     },
     {
-      "line": 337,
+      "line": 328,
       "column": 27,
-      "stop_line": 337,
+      "stop_line": 328,
       "stop_column": 38,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -673,9 +685,9 @@
       "severity": "error"
     },
     {
-      "line": 341,
+      "line": 332,
       "column": 27,
-      "stop_line": 341,
+      "stop_line": 332,
       "stop_column": 38,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -685,9 +697,9 @@
       "severity": "error"
     },
     {
-      "line": 359,
+      "line": 350,
       "column": 26,
-      "stop_line": 359,
+      "stop_line": 350,
       "stop_column": 51,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -697,21 +709,21 @@
       "severity": "error"
     },
     {
-      "line": 395,
-      "column": 16,
-      "stop_line": 395,
-      "stop_column": 49,
+      "line": 359,
+      "column": 9,
+      "stop_line": 359,
+      "stop_column": 29,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[list[Optimizer], list[dict[str, Unknown]]]` is not assignable to declared return type `Optimizer | Sequence[Optimizer] | Sequence[OptimizerLRSchedulerConfig] | OptimizerLRSchedulerConfig | tuple[Sequence[Optimizer], Sequence[LRScheduler | LRSchedulerConfig | ReduceLROnPlateau]] | None`",
-      "concise_description": "Returned type `tuple[list[Optimizer], list[dict[str, Unknown]]]` is not assignable to declared return type `Optimizer | Sequence[Optimizer] | Sequence[OptimizerLRSchedulerConfig] | OptimizerLRSchedulerConfig | tuple[Sequence[Optimizer], Sequence[LRScheduler | LRSchedulerConfig | ReduceLROnPlateau]] | None`",
+      "name": "bad-override",
+      "description": "Class member `OTXModel.configure_optimizers` overrides parent class `LightningModule` in an inconsistent manner\n  `OTXModel.configure_optimizers` has type `BoundMethod[OTXModel, (self: OTXModel) -> tuple[list[Optimizer], list[dict[str, Any]]]]`, which is not assignable to `BoundMethod[OTXModel, (self: OTXModel) -> OptimizerLRScheduler]`, the type of `LightningModule.configure_optimizers`",
+      "concise_description": "Class member `OTXModel.configure_optimizers` overrides parent class `LightningModule` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 515,
+      "line": 506,
       "column": 9,
-      "stop_line": 515,
+      "stop_line": 506,
       "stop_column": 24,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -721,9 +733,9 @@
       "severity": "error"
     },
     {
-      "line": 610,
+      "line": 601,
       "column": 9,
-      "stop_line": 610,
+      "stop_line": 601,
       "stop_column": 16,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -733,9 +745,9 @@
       "severity": "error"
     },
     {
-      "line": 617,
+      "line": 608,
       "column": 20,
-      "stop_line": 617,
+      "stop_line": 608,
       "stop_column": 52,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -745,9 +757,9 @@
       "severity": "error"
     },
     {
-      "line": 659,
+      "line": 650,
       "column": 9,
-      "stop_line": 659,
+      "stop_line": 650,
       "stop_column": 42,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -757,9 +769,9 @@
       "severity": "error"
     },
     {
-      "line": 861,
+      "line": 830,
       "column": 9,
-      "stop_line": 861,
+      "stop_line": 830,
       "stop_column": 26,
       "path": "src/otx/backend/native/models/base.py",
       "code": -2,
@@ -1508,7 +1520,7 @@
       "path": "src/otx/backend/native/models/classification/classifier/h_label_classifier.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`>` is not supported between `Module` and `Literal[0]`\n  Expected `__gt__` to be a callable, got `Module | Tensor`",
+      "description": "`>` is not supported between `Module` and `Literal[0]`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__lt__`",
       "concise_description": "`>` is not supported between `Module` and `Literal[0]`",
       "severity": "error"
     },
@@ -1556,7 +1568,7 @@
       "path": "src/otx/backend/native/models/classification/classifier/h_label_classifier.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`>` is not supported between `Module` and `Literal[0]`\n  Expected `__gt__` to be a callable, got `Module | Tensor`",
+      "description": "`>` is not supported between `Module` and `Literal[0]`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__lt__`",
       "concise_description": "`>` is not supported between `Module` and `Literal[0]`",
       "severity": "error"
     },
@@ -1628,7 +1640,7 @@
       "path": "src/otx/backend/native/models/classification/classifier/h_label_classifier.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`>` is not supported between `Module` and `Literal[0]`\n  Expected `__gt__` to be a callable, got `Module | Tensor`",
+      "description": "`>` is not supported between `Module` and `Literal[0]`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__lt__`",
       "concise_description": "`>` is not supported between `Module` and `Literal[0]`",
       "severity": "error"
     },
@@ -1849,9 +1861,9 @@
       "severity": "error"
     },
     {
-      "line": 219,
+      "line": 218,
       "column": 32,
-      "stop_line": 219,
+      "stop_line": 218,
       "stop_column": 78,
       "path": "src/otx/backend/native/models/classification/hlabel_models/base.py",
       "code": -2,
@@ -1861,9 +1873,9 @@
       "severity": "error"
     },
     {
-      "line": 220,
+      "line": 219,
       "column": 32,
-      "stop_line": 220,
+      "stop_line": 219,
       "stop_column": 78,
       "path": "src/otx/backend/native/models/classification/hlabel_models/base.py",
       "code": -2,
@@ -1873,9 +1885,9 @@
       "severity": "error"
     },
     {
-      "line": 248,
+      "line": 250,
       "column": 53,
-      "stop_line": 248,
+      "stop_line": 250,
       "stop_column": 59,
       "path": "src/otx/backend/native/models/classification/hlabel_models/base.py",
       "code": -2,
@@ -1885,9 +1897,9 @@
       "severity": "error"
     },
     {
-      "line": 263,
+      "line": 265,
       "column": 9,
-      "stop_line": 263,
+      "stop_line": 265,
       "stop_column": 28,
       "path": "src/otx/backend/native/models/classification/hlabel_models/base.py",
       "code": -2,
@@ -1897,9 +1909,9 @@
       "severity": "error"
     },
     {
-      "line": 132,
+      "line": 135,
       "column": 32,
-      "stop_line": 132,
+      "stop_line": 135,
       "stop_column": 78,
       "path": "src/otx/backend/native/models/classification/hlabel_models/mobilenet_v3.py",
       "code": -2,
@@ -1909,9 +1921,9 @@
       "severity": "error"
     },
     {
-      "line": 133,
+      "line": 136,
       "column": 32,
-      "stop_line": 133,
+      "stop_line": 136,
       "stop_column": 78,
       "path": "src/otx/backend/native/models/classification/hlabel_models/mobilenet_v3.py",
       "code": -2,
@@ -1921,9 +1933,9 @@
       "severity": "error"
     },
     {
-      "line": 131,
+      "line": 134,
       "column": 72,
-      "stop_line": 131,
+      "stop_line": 134,
       "stop_column": 82,
       "path": "src/otx/backend/native/models/classification/hlabel_models/vit.py",
       "code": -2,
@@ -1933,9 +1945,9 @@
       "severity": "error"
     },
     {
-      "line": 132,
+      "line": 135,
       "column": 13,
-      "stop_line": 132,
+      "stop_line": 135,
       "stop_column": 43,
       "path": "src/otx/backend/native/models/classification/hlabel_models/vit.py",
       "code": -2,
@@ -1981,9 +1993,9 @@
       "severity": "error"
     },
     {
-      "line": 166,
+      "line": 168,
       "column": 53,
-      "stop_line": 166,
+      "stop_line": 168,
       "stop_column": 59,
       "path": "src/otx/backend/native/models/classification/multiclass_models/base.py",
       "code": -2,
@@ -1993,9 +2005,9 @@
       "severity": "error"
     },
     {
-      "line": 168,
+      "line": 170,
       "column": 9,
-      "stop_line": 168,
+      "stop_line": 170,
       "stop_column": 28,
       "path": "src/otx/backend/native/models/classification/multiclass_models/base.py",
       "code": -2,
@@ -2185,9 +2197,9 @@
       "severity": "error"
     },
     {
-      "line": 155,
+      "line": 154,
       "column": 9,
-      "stop_line": 155,
+      "stop_line": 154,
       "stop_column": 28,
       "path": "src/otx/backend/native/models/classification/multilabel_models/base.py",
       "code": -2,
@@ -2197,9 +2209,9 @@
       "severity": "error"
     },
     {
-      "line": 163,
+      "line": 165,
       "column": 53,
-      "stop_line": 163,
+      "stop_line": 165,
       "stop_column": 59,
       "path": "src/otx/backend/native/models/classification/multilabel_models/base.py",
       "code": -2,
@@ -2209,9 +2221,9 @@
       "severity": "error"
     },
     {
-      "line": 117,
+      "line": 120,
       "column": 72,
-      "stop_line": 117,
+      "stop_line": 120,
       "stop_column": 82,
       "path": "src/otx/backend/native/models/classification/multilabel_models/vit.py",
       "code": -2,
@@ -2221,9 +2233,9 @@
       "severity": "error"
     },
     {
-      "line": 118,
+      "line": 121,
       "column": 13,
-      "stop_line": 118,
+      "stop_line": 121,
       "stop_column": 43,
       "path": "src/otx/backend/native/models/classification/multilabel_models/vit.py",
       "code": -2,
@@ -2420,7 +2432,7 @@
       "path": "src/otx/backend/native/models/classification/utils/peft.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `Literal[2]` and `Module`\n  Expected `__rmul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `Literal[2]` and `Module`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__mul__`",
       "concise_description": "`*` is not supported between `Literal[2]` and `Module`",
       "severity": "error"
     },
@@ -2456,7 +2468,7 @@
       "path": "src/otx/backend/native/models/classification/utils/peft.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`+` is not supported between `Tensor` and `Module`\n  Expected `__radd__` to be a callable, got `Module | Tensor`",
+      "description": "`+` is not supported between `Tensor` and `Module`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._C.TensorBase.__add__`",
       "concise_description": "`+` is not supported between `Tensor` and `Module`",
       "severity": "error"
     },
@@ -2624,7 +2636,7 @@
       "path": "src/otx/backend/native/models/common/backbones/pytorchcv_backbones.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`+` is not supported between `Module` and `Literal[1]`\n  Expected `__add__` to be a callable, got `Module | Tensor`",
+      "description": "`+` is not supported between `Module` and `Literal[1]`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__radd__`",
       "concise_description": "`+` is not supported between `Module` and `Literal[1]`",
       "severity": "error"
     },
@@ -3092,7 +3104,7 @@
       "path": "src/otx/backend/native/models/common/layers/res_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `int` and `Module`\n  Expected `__rmul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `int` and `Module`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__mul__`",
       "concise_description": "`*` is not supported between `int` and `Module`",
       "severity": "error"
     },
@@ -3116,8 +3128,8 @@
       "path": "src/otx/backend/native/models/common/layers/res_layer.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Tensor | Unknown` is not assignable to parameter `out_channels` with type `int` in function `torch.nn.modules.conv.Conv2d.__init__`",
-      "concise_description": "Argument `Tensor | Unknown` is not assignable to parameter `out_channels` with type `int` in function `torch.nn.modules.conv.Conv2d.__init__`",
+      "description": "Argument `Tensor | int` is not assignable to parameter `out_channels` with type `int` in function `torch.nn.modules.conv.Conv2d.__init__`",
+      "concise_description": "Argument `Tensor | int` is not assignable to parameter `out_channels` with type `int` in function `torch.nn.modules.conv.Conv2d.__init__`",
       "severity": "error"
     },
     {
@@ -3128,7 +3140,7 @@
       "path": "src/otx/backend/native/models/common/layers/res_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `int` and `Module`\n  Expected `__rmul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `int` and `Module`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__mul__`",
       "concise_description": "`*` is not supported between `int` and `Module`",
       "severity": "error"
     },
@@ -3140,8 +3152,8 @@
       "path": "src/otx/backend/native/models/common/layers/res_layer.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Tensor | Unknown` is not assignable to parameter `num_features` with type `int` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Argument `Tensor | Unknown` is not assignable to parameter `num_features` with type `int` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "description": "Argument `Tensor | int` is not assignable to parameter `num_features` with type `int` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Argument `Tensor | int` is not assignable to parameter `num_features` with type `int` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
@@ -3152,7 +3164,7 @@
       "path": "src/otx/backend/native/models/common/layers/res_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `int` and `Module`\n  Expected `__rmul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `int` and `Module`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__mul__`",
       "concise_description": "`*` is not supported between `int` and `Module`",
       "severity": "error"
     },
@@ -3164,8 +3176,8 @@
       "path": "src/otx/backend/native/models/common/layers/res_layer.py",
       "code": -2,
       "name": "bad-assignment",
-      "description": "`Tensor | Unknown` is not assignable to variable `inplanes` with type `int`",
-      "concise_description": "`Tensor | Unknown` is not assignable to variable `inplanes` with type `int`",
+      "description": "`Tensor | int` is not assignable to variable `inplanes` with type `int`",
+      "concise_description": "`Tensor | int` is not assignable to variable `inplanes` with type `int`",
       "severity": "error"
     },
     {
@@ -3176,7 +3188,7 @@
       "path": "src/otx/backend/native/models/common/layers/res_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `int` and `Module`\n  Expected `__rmul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `int` and `Module`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__mul__`",
       "concise_description": "`*` is not supported between `int` and `Module`",
       "severity": "error"
     },
@@ -3236,7 +3248,7 @@
       "path": "src/otx/backend/native/models/common/layers/transformer_layers.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `Parameter` and `Module`\n  Expected `__rmul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `Parameter` and `Module`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._C.TensorBase.__mul__`",
       "concise_description": "`*` is not supported between `Parameter` and `Module`",
       "severity": "error"
     },
@@ -4184,8 +4196,8 @@
       "path": "src/otx/backend/native/models/common/utils/utils.py",
       "code": -2,
       "name": "bad-assignment",
-      "description": "`Literal[0] | Tensor` is not assignable to `int` (caused by inconsistent types when breaking cycles)",
-      "concise_description": "`Literal[0] | Tensor` is not assignable to `int` (caused by inconsistent types when breaking cycles)",
+      "description": "Pyrefly detected conflicting types while breaking a dependency cycle: `Literal[0] | Tensor` is not assignable to `int`. Adding explicit type annotations might possibly help.",
+      "concise_description": "Pyrefly detected conflicting types while breaking a dependency cycle: `Literal[0] | Tensor` is not assignable to `int`. Adding explicit type annotations might possibly help.",
       "severity": "error"
     },
     {
@@ -4345,9 +4357,9 @@
       "severity": "error"
     },
     {
-      "line": 519,
+      "line": 520,
       "column": 21,
-      "stop_line": 519,
+      "stop_line": 520,
       "stop_column": 31,
       "path": "src/otx/backend/native/models/detection/backbones/hgnetv2.py",
       "code": -2,
@@ -4357,9 +4369,9 @@
       "severity": "error"
     },
     {
-      "line": 520,
+      "line": 521,
       "column": 21,
-      "stop_line": 520,
+      "stop_line": 521,
       "stop_column": 32,
       "path": "src/otx/backend/native/models/detection/backbones/hgnetv2.py",
       "code": -2,
@@ -4424,8 +4436,8 @@
       "path": "src/otx/backend/native/models/detection/backbones/presnet.py",
       "code": -2,
       "name": "bad-assignment",
-      "description": "`Tensor | Unknown` is not assignable to variable `ch_in` with type `int`",
-      "concise_description": "`Tensor | Unknown` is not assignable to variable `ch_in` with type `int`",
+      "description": "`Tensor | int` is not assignable to variable `ch_in` with type `int`",
+      "concise_description": "`Tensor | int` is not assignable to variable `ch_in` with type `int`",
       "severity": "error"
     },
     {
@@ -4436,7 +4448,7 @@
       "path": "src/otx/backend/native/models/detection/backbones/presnet.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `int` and `Module`\n  Expected `__rmul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `int` and `Module`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__mul__`",
       "concise_description": "`*` is not supported between `int` and `Module`",
       "severity": "error"
     },
@@ -4484,7 +4496,7 @@
       "path": "src/otx/backend/native/models/detection/backbones/presnet.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `Module` and `int`\n  Expected `__mul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `Module` and `int`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__rmul__`",
       "concise_description": "`*` is not supported between `Module` and `int`",
       "severity": "error"
     },
@@ -4573,9 +4585,9 @@
       "severity": "error"
     },
     {
-      "line": 95,
+      "line": 93,
       "column": 40,
-      "stop_line": 95,
+      "stop_line": 93,
       "stop_column": 57,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4585,9 +4597,9 @@
       "severity": "error"
     },
     {
-      "line": 96,
+      "line": 94,
       "column": 33,
-      "stop_line": 96,
+      "stop_line": 94,
       "stop_column": 54,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4597,21 +4609,9 @@
       "severity": "error"
     },
     {
-      "line": 122,
-      "column": 51,
-      "stop_line": 122,
-      "stop_column": 56,
-      "path": "src/otx/backend/native/models/detection/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `OTXPredictionBatch | Tensor` is not assignable to parameter `outputs` with type `OTXPredictionBatch` in function `OTXDetectionModel._filter_outputs_by_threshold`",
-      "concise_description": "Argument `OTXPredictionBatch | Tensor` is not assignable to parameter `outputs` with type `OTXPredictionBatch` in function `OTXDetectionModel._filter_outputs_by_threshold`",
-      "severity": "error"
-    },
-    {
-      "line": 166,
+      "line": 172,
       "column": 47,
-      "stop_line": 166,
+      "stop_line": 172,
       "stop_column": 65,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4621,9 +4621,9 @@
       "severity": "error"
     },
     {
-      "line": 166,
+      "line": 172,
       "column": 47,
-      "stop_line": 166,
+      "stop_line": 172,
       "stop_column": 65,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4633,9 +4633,9 @@
       "severity": "error"
     },
     {
-      "line": 166,
+      "line": 172,
       "column": 67,
-      "stop_line": 166,
+      "stop_line": 172,
       "stop_column": 71,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4645,9 +4645,9 @@
       "severity": "error"
     },
     {
-      "line": 166,
+      "line": 172,
       "column": 67,
-      "stop_line": 166,
+      "stop_line": 172,
       "stop_column": 71,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4657,9 +4657,9 @@
       "severity": "error"
     },
     {
-      "line": 170,
+      "line": 176,
       "column": 26,
-      "stop_line": 170,
+      "stop_line": 176,
       "stop_column": 32,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4669,9 +4669,9 @@
       "severity": "error"
     },
     {
-      "line": 174,
+      "line": 180,
       "column": 9,
-      "stop_line": 174,
+      "stop_line": 180,
       "stop_column": 26,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4681,9 +4681,9 @@
       "severity": "error"
     },
     {
-      "line": 206,
+      "line": 202,
       "column": 33,
-      "stop_line": 206,
+      "stop_line": 202,
       "stop_column": 39,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4693,9 +4693,9 @@
       "severity": "error"
     },
     {
-      "line": 288,
+      "line": 284,
       "column": 31,
-      "stop_line": 288,
+      "stop_line": 284,
       "stop_column": 37,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4705,45 +4705,33 @@
       "severity": "error"
     },
     {
-      "line": 293,
+      "line": 291,
       "column": 20,
-      "stop_line": 293,
-      "stop_column": 72,
+      "stop_line": 291,
+      "stop_column": 73,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[Tensor | ndarray[Unknown, Unknown]]` is not assignable to parameter `images` with type `Tensor | list[Tensor]` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "concise_description": "Argument `list[Tensor | ndarray[Unknown, Unknown]]` is not assignable to parameter `images` with type `Tensor | list[Tensor]` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
+      "description": "Argument `list[Tensor | None]` is not assignable to parameter `scores` with type `list[Tensor] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
+      "concise_description": "Argument `list[Tensor | None]` is not assignable to parameter `scores` with type `list[Tensor] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 295,
+      "line": 292,
       "column": 20,
-      "stop_line": 295,
+      "stop_line": 292,
       "stop_column": 73,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[Tensor | None]` is not assignable to parameter `scores` with type `list[Tensor] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "concise_description": "Argument `list[Tensor | None]` is not assignable to parameter `scores` with type `list[Tensor] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
+      "description": "Argument `list[BoundingBoxes | None]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
+      "concise_description": "Argument `list[BoundingBoxes | None]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 296,
-      "column": 20,
-      "stop_line": 296,
-      "stop_column": 73,
-      "path": "src/otx/backend/native/models/detection/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[BoundingBoxes | None]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "concise_description": "Argument `list[BoundingBoxes | None]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 297,
+      "line": 293,
       "column": 20,
-      "stop_line": 297,
+      "stop_line": 293,
       "stop_column": 72,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4753,9 +4741,9 @@
       "severity": "error"
     },
     {
-      "line": 300,
+      "line": 296,
       "column": 40,
-      "stop_line": 300,
+      "stop_line": 296,
       "stop_column": 99,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4765,9 +4753,9 @@
       "severity": "error"
     },
     {
-      "line": 301,
+      "line": 297,
       "column": 42,
-      "stop_line": 301,
+      "stop_line": 297,
       "stop_column": 103,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4777,21 +4765,21 @@
       "severity": "error"
     },
     {
-      "line": 305,
+      "line": 301,
       "column": 9,
-      "stop_line": 305,
+      "stop_line": 301,
       "stop_column": 28,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
       "name": "bad-override",
-      "description": "Class member `OTXDetectionModel.forward_for_tracing` overrides parent class `OTXModel` in an inconsistent manner\n  `OTXDetectionModel.forward_for_tracing` has type `BoundMethod[OTXDetectionModel, (self: OTXDetectionModel, inputs: Tensor) -> list[InstanceData]]`, which is not assignable to `BoundMethod[OTXDetectionModel, (self: OTXDetectionModel, *args: Unknown, **kwargs: Unknown) -> Tensor | dict[str, Tensor]]`, the type of `OTXModel.forward_for_tracing`",
+      "description": "Class member `OTXDetectionModel.forward_for_tracing` overrides parent class `OTXModel` in an inconsistent manner\n  `OTXDetectionModel.forward_for_tracing` has type `BoundMethod[OTXDetectionModel, (self: OTXDetectionModel, inputs: Tensor) -> dict[str, Any] | tuple[Tensor, ...]]`, which is not assignable to `BoundMethod[OTXDetectionModel, (self: OTXDetectionModel, *args: Unknown, **kwargs: Unknown) -> Tensor | dict[str, Tensor]]`, the type of `OTXModel.forward_for_tracing`",
       "concise_description": "Class member `OTXDetectionModel.forward_for_tracing` overrides parent class `OTXModel` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 315,
+      "line": 311,
       "column": 16,
-      "stop_line": 315,
+      "stop_line": 311,
       "stop_column": 33,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4801,9 +4789,9 @@
       "severity": "error"
     },
     {
-      "line": 351,
+      "line": 347,
       "column": 9,
-      "stop_line": 351,
+      "stop_line": 347,
       "stop_column": 27,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4813,33 +4801,21 @@
       "severity": "error"
     },
     {
-      "line": 421,
-      "column": 31,
-      "stop_line": 421,
-      "stop_column": 40,
-      "path": "src/otx/backend/native/models/detection/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Size` is not assignable to parameter `img_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "concise_description": "Argument `Size` is not assignable to parameter `img_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "severity": "error"
-    },
-    {
-      "line": 422,
-      "column": 31,
-      "stop_line": 422,
-      "stop_column": 40,
+      "line": 410,
+      "column": 27,
+      "stop_line": 410,
+      "stop_column": 80,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Size` is not assignable to parameter `ori_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "concise_description": "Argument `Size` is not assignable to parameter `ori_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[int, int], ori_shape=tuple[int, int])",
       "severity": "error"
     },
     {
-      "line": 434,
+      "line": 420,
       "column": 40,
-      "stop_line": 434,
+      "stop_line": 420,
       "stop_column": 57,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4849,9 +4825,9 @@
       "severity": "error"
     },
     {
-      "line": 435,
+      "line": 421,
       "column": 33,
-      "stop_line": 435,
+      "stop_line": 421,
       "stop_column": 54,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4861,9 +4837,9 @@
       "severity": "error"
     },
     {
-      "line": 439,
+      "line": 425,
       "column": 45,
-      "stop_line": 439,
+      "stop_line": 425,
       "stop_column": 55,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4873,9 +4849,9 @@
       "severity": "error"
     },
     {
-      "line": 441,
+      "line": 427,
       "column": 50,
-      "stop_line": 441,
+      "stop_line": 427,
       "stop_column": 60,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4885,9 +4861,9 @@
       "severity": "error"
     },
     {
-      "line": 444,
+      "line": 430,
       "column": 13,
-      "stop_line": 446,
+      "stop_line": 432,
       "stop_column": 40,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4897,21 +4873,9 @@
       "severity": "error"
     },
     {
-      "line": 456,
-      "column": 43,
-      "stop_line": 456,
-      "stop_column": 56,
-      "path": "src/otx/backend/native/models/detection/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `otx.backend.native.models.detection.detectors.single_stage_detector.SingleStageDetector.extract_feat`",
-      "concise_description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `otx.backend.native.models.detection.detectors.single_stage_detector.SingleStageDetector.extract_feat`",
-      "severity": "error"
-    },
-    {
-      "line": 460,
+      "line": 446,
       "column": 26,
-      "stop_line": 460,
+      "stop_line": 446,
       "stop_column": 48,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4921,9 +4885,9 @@
       "severity": "error"
     },
     {
-      "line": 461,
+      "line": 447,
       "column": 24,
-      "stop_line": 461,
+      "stop_line": 447,
       "stop_column": 39,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4933,9 +4897,9 @@
       "severity": "error"
     },
     {
-      "line": 464,
+      "line": 450,
       "column": 27,
-      "stop_line": 464,
+      "stop_line": 450,
       "stop_column": 49,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4945,9 +4909,9 @@
       "severity": "error"
     },
     {
-      "line": 513,
+      "line": 499,
       "column": 33,
-      "stop_line": 513,
+      "stop_line": 499,
       "stop_column": 54,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4957,9 +4921,9 @@
       "severity": "error"
     },
     {
-      "line": 532,
+      "line": 518,
       "column": 39,
-      "stop_line": 532,
+      "stop_line": 518,
       "stop_column": 43,
       "path": "src/otx/backend/native/models/detection/base.py",
       "code": -2,
@@ -4969,9 +4933,9 @@
       "severity": "error"
     },
     {
-      "line": 112,
+      "line": 105,
       "column": 30,
-      "stop_line": 112,
+      "stop_line": 105,
       "stop_column": 52,
       "path": "src/otx/backend/native/models/detection/detectors/detection_transformer.py",
       "code": -2,
@@ -4981,9 +4945,9 @@
       "severity": "error"
     },
     {
-      "line": 113,
+      "line": 106,
       "column": 28,
-      "stop_line": 113,
+      "stop_line": 106,
       "stop_column": 43,
       "path": "src/otx/backend/native/models/detection/detectors/detection_transformer.py",
       "code": -2,
@@ -4993,9 +4957,9 @@
       "severity": "error"
     },
     {
-      "line": 137,
+      "line": 130,
       "column": 22,
-      "stop_line": 137,
+      "stop_line": 130,
       "stop_column": 78,
       "path": "src/otx/backend/native/models/detection/detectors/detection_transformer.py",
       "code": -2,
@@ -5005,9 +4969,9 @@
       "severity": "error"
     },
     {
-      "line": 170,
+      "line": 163,
       "column": 77,
-      "stop_line": 170,
+      "stop_line": 163,
       "stop_column": 81,
       "path": "src/otx/backend/native/models/detection/detectors/detection_transformer.py",
       "code": -2,
@@ -5017,9 +4981,9 @@
       "severity": "error"
     },
     {
-      "line": 183,
+      "line": 176,
       "column": 30,
-      "stop_line": 183,
+      "stop_line": 176,
       "stop_column": 76,
       "path": "src/otx/backend/native/models/detection/detectors/detection_transformer.py",
       "code": -2,
@@ -5052,18 +5016,6 @@
       "concise_description": "Argument `OTXSampleBatch` is not assignable to parameter `batch_inputs` with type `Tensor` in function `SingleStageDetector._forward`",
       "severity": "error"
     },
-    {
-      "line": 151,
-      "column": 31,
-      "stop_line": 151,
-      "stop_column": 44,
-      "path": "src/otx/backend/native/models/detection/detectors/single_stage_detector.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `SingleStageDetector.extract_feat`",
-      "concise_description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `SingleStageDetector.extract_feat`",
-      "severity": "error"
-    },
     {
       "line": 153,
       "column": 38,
@@ -5076,18 +5028,6 @@
       "concise_description": "Expected a callable, got `Tensor`",
       "severity": "error"
     },
-    {
-      "line": 185,
-      "column": 31,
-      "stop_line": 185,
-      "stop_column": 44,
-      "path": "src/otx/backend/native/models/detection/detectors/single_stage_detector.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `SingleStageDetector.extract_feat`",
-      "concise_description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `SingleStageDetector.extract_feat`",
-      "severity": "error"
-    },
     {
       "line": 186,
       "column": 16,
@@ -6512,7 +6452,7 @@
       "path": "src/otx/backend/native/models/detection/heads/dfine_decoder.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `Module` and `Tensor`\n  Expected `__mul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `Module` and `Tensor`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._C.TensorBase.__rmul__`",
       "concise_description": "`*` is not supported between `Module` and `Tensor`",
       "severity": "error"
     },
@@ -7556,7 +7496,7 @@
       "path": "src/otx/backend/native/models/detection/layers/csp_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`+` is not supported between `Module` and `Module`\n  Expected `__add__` to be a callable, got `Module | Tensor`",
+      "description": "`+` is not supported between `Module` and `Module`\n  Cannot find `__add__` or `__radd__`",
       "concise_description": "`+` is not supported between `Module` and `Module`",
       "severity": "error"
     },
@@ -7568,7 +7508,7 @@
       "path": "src/otx/backend/native/models/detection/layers/csp_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`+` is not supported between `Module` and `Tensor`\n  Expected `__add__` to be a callable, got `Module | Tensor`",
+      "description": "`+` is not supported between `Module` and `Tensor`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._C.TensorBase.__radd__`",
       "concise_description": "`+` is not supported between `Module` and `Tensor`",
       "severity": "error"
     },
@@ -7580,7 +7520,7 @@
       "path": "src/otx/backend/native/models/detection/layers/csp_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`+` is not supported between `Tensor` and `Module`\n  Expected `__radd__` to be a callable, got `Module | Tensor`",
+      "description": "`+` is not supported between `Tensor` and `Module`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._C.TensorBase.__add__`",
       "concise_description": "`+` is not supported between `Tensor` and `Module`",
       "severity": "error"
     },
@@ -7592,7 +7532,7 @@
       "path": "src/otx/backend/native/models/detection/layers/csp_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`/` is not supported between `Module` and `Tensor`\n  Expected `__truediv__` to be a callable, got `Module | Tensor`",
+      "description": "`/` is not supported between `Module` and `Tensor`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._tensor.Tensor.__rdiv__`",
       "concise_description": "`/` is not supported between `Module` and `Tensor`",
       "severity": "error"
     },
@@ -7616,7 +7556,7 @@
       "path": "src/otx/backend/native/models/detection/layers/csp_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`-` is not supported between `Module` and `Tensor`\n  Expected `__sub__` to be a callable, got `Module | Tensor`",
+      "description": "`-` is not supported between `Module` and `Tensor`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._tensor.Tensor.__rsub__`",
       "concise_description": "`-` is not supported between `Module` and `Tensor`",
       "severity": "error"
     },
@@ -7628,7 +7568,7 @@
       "path": "src/otx/backend/native/models/detection/layers/csp_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `Module` and `Module`\n  Expected `__mul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `Module` and `Module`\n  Cannot find `__mul__` or `__rmul__`",
       "concise_description": "`*` is not supported between `Module` and `Module`",
       "severity": "error"
     },
@@ -7640,7 +7580,7 @@
       "path": "src/otx/backend/native/models/detection/layers/csp_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `Module` and `Tensor`\n  Expected `__mul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `Module` and `Tensor`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._C.TensorBase.__rmul__`",
       "concise_description": "`*` is not supported between `Module` and `Tensor`",
       "severity": "error"
     },
@@ -7652,7 +7592,7 @@
       "path": "src/otx/backend/native/models/detection/layers/csp_layer.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `Tensor` and `Module`\n  Expected `__rmul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `Tensor` and `Module`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._C.TensorBase.__mul__`",
       "concise_description": "`*` is not supported between `Tensor` and `Module`",
       "severity": "error"
     },
@@ -8461,21 +8401,9 @@
       "severity": "error"
     },
     {
-      "line": 158,
-      "column": 42,
-      "stop_line": 158,
-      "stop_column": 55,
-      "path": "src/otx/backend/native/models/detection/rtdetr.py",
-      "code": -2,
-      "name": "unbound-name",
-      "description": "`scaled_bboxes` may be uninitialized",
-      "concise_description": "`scaled_bboxes` may be uninitialized",
-      "severity": "error"
-    },
-    {
-      "line": 168,
+      "line": 171,
       "column": 9,
-      "stop_line": 168,
+      "stop_line": 171,
       "stop_column": 27,
       "path": "src/otx/backend/native/models/detection/rtdetr.py",
       "code": -2,
@@ -8485,9 +8413,9 @@
       "severity": "error"
     },
     {
-      "line": 180,
+      "line": 183,
       "column": 33,
-      "stop_line": 180,
+      "stop_line": 183,
       "stop_column": 39,
       "path": "src/otx/backend/native/models/detection/rtdetr.py",
       "code": -2,
@@ -8497,9 +8425,9 @@
       "severity": "error"
     },
     {
-      "line": 189,
+      "line": 192,
       "column": 34,
-      "stop_line": 189,
+      "stop_line": 192,
       "stop_column": 56,
       "path": "src/otx/backend/native/models/detection/rtdetr.py",
       "code": -2,
@@ -8509,21 +8437,9 @@
       "severity": "error"
     },
     {
-      "line": 222,
-      "column": 9,
-      "stop_line": 222,
-      "stop_column": 29,
-      "path": "src/otx/backend/native/models/detection/rtdetr.py",
-      "code": -2,
-      "name": "bad-override",
-      "description": "Class member `RTDETR.configure_optimizers` overrides parent class `OTXDetectionModel` in an inconsistent manner\n  `RTDETR.configure_optimizers` has type `BoundMethod[RTDETR, (self: RTDETR) -> tuple[list[Optimizer], list[dict[str, Any]]]]`, which is not assignable to `BoundMethod[RTDETR, (self: RTDETR) -> OptimizerLRScheduler]`, the type of `OTXDetectionModel.configure_optimizers`",
-      "concise_description": "Class member `RTDETR.configure_optimizers` overrides parent class `OTXDetectionModel` in an inconsistent manner",
-      "severity": "error"
-    },
-    {
-      "line": 234,
+      "line": 237,
       "column": 47,
-      "stop_line": 234,
+      "stop_line": 237,
       "stop_column": 81,
       "path": "src/otx/backend/native/models/detection/rtdetr.py",
       "code": -2,
@@ -8533,9 +8449,9 @@
       "severity": "error"
     },
     {
-      "line": 268,
+      "line": 271,
       "column": 20,
-      "stop_line": 268,
+      "stop_line": 271,
       "stop_column": 38,
       "path": "src/otx/backend/native/models/detection/rtdetr.py",
       "code": -2,
@@ -8545,9 +8461,9 @@
       "severity": "error"
     },
     {
-      "line": 274,
+      "line": 277,
       "column": 19,
-      "stop_line": 274,
+      "stop_line": 277,
       "stop_column": 22,
       "path": "src/otx/backend/native/models/detection/rtdetr.py",
       "code": -2,
@@ -8593,9 +8509,9 @@
       "severity": "error"
     },
     {
-      "line": 274,
+      "line": 277,
       "column": 20,
-      "stop_line": 274,
+      "stop_line": 277,
       "stop_column": 44,
       "path": "src/otx/backend/native/models/detection/ssd.py",
       "code": -2,
@@ -8605,9 +8521,9 @@
       "severity": "error"
     },
     {
-      "line": 365,
+      "line": 368,
       "column": 9,
-      "stop_line": 365,
+      "stop_line": 368,
       "stop_column": 27,
       "path": "src/otx/backend/native/models/detection/ssd.py",
       "code": -2,
@@ -8617,9 +8533,9 @@
       "severity": "error"
     },
     {
-      "line": 368,
+      "line": 371,
       "column": 32,
-      "stop_line": 368,
+      "stop_line": 371,
       "stop_column": 69,
       "path": "src/otx/backend/native/models/detection/ssd.py",
       "code": -2,
@@ -8701,9 +8617,9 @@
       "severity": "error"
     },
     {
-      "line": 190,
+      "line": 191,
       "column": 30,
-      "stop_line": 190,
+      "stop_line": 191,
       "stop_column": 54,
       "path": "src/otx/backend/native/models/detection/yolox.py",
       "code": -2,
@@ -8713,9 +8629,9 @@
       "severity": "error"
     },
     {
-      "line": 192,
+      "line": 193,
       "column": 13,
-      "stop_line": 192,
+      "stop_line": 193,
       "stop_column": 37,
       "path": "src/otx/backend/native/models/detection/yolox.py",
       "code": -2,
@@ -8725,9 +8641,9 @@
       "severity": "error"
     },
     {
-      "line": 192,
+      "line": 193,
       "column": 48,
-      "stop_line": 192,
+      "stop_line": 193,
       "stop_column": 72,
       "path": "src/otx/backend/native/models/detection/yolox.py",
       "code": -2,
@@ -8737,9 +8653,9 @@
       "severity": "error"
     },
     {
-      "line": 195,
+      "line": 196,
       "column": 13,
-      "stop_line": 195,
+      "stop_line": 196,
       "stop_column": 37,
       "path": "src/otx/backend/native/models/detection/yolox.py",
       "code": -2,
@@ -8881,9 +8797,9 @@
       "severity": "error"
     },
     {
-      "line": 98,
+      "line": 96,
       "column": 40,
-      "stop_line": 98,
+      "stop_line": 96,
       "stop_column": 57,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -8893,9 +8809,9 @@
       "severity": "error"
     },
     {
-      "line": 99,
+      "line": 97,
       "column": 33,
-      "stop_line": 99,
+      "stop_line": 97,
       "stop_column": 54,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -8905,9 +8821,9 @@
       "severity": "error"
     },
     {
-      "line": 100,
+      "line": 98,
       "column": 44,
-      "stop_line": 100,
+      "stop_line": 98,
       "stop_column": 70,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -8917,9 +8833,9 @@
       "severity": "error"
     },
     {
-      "line": 104,
+      "line": 102,
       "column": 20,
-      "stop_line": 104,
+      "stop_line": 102,
       "stop_column": 37,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -8929,9 +8845,9 @@
       "severity": "error"
     },
     {
-      "line": 110,
+      "line": 108,
       "column": 39,
-      "stop_line": 110,
+      "stop_line": 108,
       "stop_column": 53,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -8941,9 +8857,9 @@
       "severity": "error"
     },
     {
-      "line": 114,
+      "line": 112,
       "column": 9,
-      "stop_line": 114,
+      "stop_line": 112,
       "stop_column": 26,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -8953,9 +8869,9 @@
       "severity": "error"
     },
     {
-      "line": 138,
+      "line": 136,
       "column": 41,
-      "stop_line": 138,
+      "stop_line": 136,
       "stop_column": 82,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -8965,9 +8881,9 @@
       "severity": "error"
     },
     {
-      "line": 151,
+      "line": 149,
       "column": 41,
-      "stop_line": 155,
+      "stop_line": 153,
       "stop_column": 18,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -8977,9 +8893,9 @@
       "severity": "error"
     },
     {
-      "line": 157,
+      "line": 155,
       "column": 43,
-      "stop_line": 160,
+      "stop_line": 158,
       "stop_column": 14,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -8989,9 +8905,9 @@
       "severity": "error"
     },
     {
-      "line": 186,
+      "line": 184,
       "column": 24,
-      "stop_line": 186,
+      "stop_line": 184,
       "stop_column": 30,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9001,9 +8917,9 @@
       "severity": "error"
     },
     {
-      "line": 197,
+      "line": 195,
       "column": 20,
-      "stop_line": 197,
+      "stop_line": 195,
       "stop_column": 26,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9013,9 +8929,9 @@
       "severity": "error"
     },
     {
-      "line": 222,
+      "line": 220,
       "column": 31,
-      "stop_line": 222,
+      "stop_line": 220,
       "stop_column": 37,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9028,30 +8944,18 @@
       "line": 227,
       "column": 20,
       "stop_line": 227,
-      "stop_column": 72,
+      "stop_column": 73,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[Tensor | ndarray[Unknown, Unknown]]` is not assignable to parameter `images` with type `Tensor | list[Tensor]` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "concise_description": "Argument `list[Tensor | ndarray[Unknown, Unknown]]` is not assignable to parameter `images` with type `Tensor | list[Tensor]` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
+      "description": "Argument `list[Tensor | None]` is not assignable to parameter `scores` with type `list[Tensor] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
+      "concise_description": "Argument `list[Tensor | None]` is not assignable to parameter `scores` with type `list[Tensor] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 229,
+      "line": 228,
       "column": 20,
-      "stop_line": 229,
-      "stop_column": 73,
-      "path": "src/otx/backend/native/models/instance_segmentation/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor | None]` is not assignable to parameter `scores` with type `list[Tensor] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "concise_description": "Argument `list[Tensor | None]` is not assignable to parameter `scores` with type `list[Tensor] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 230,
-      "column": 20,
-      "stop_line": 230,
+      "stop_line": 228,
       "stop_column": 73,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9061,9 +8965,9 @@
       "severity": "error"
     },
     {
-      "line": 231,
+      "line": 229,
       "column": 20,
-      "stop_line": 231,
+      "stop_line": 229,
       "stop_column": 72,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9073,9 +8977,9 @@
       "severity": "error"
     },
     {
-      "line": 232,
+      "line": 230,
       "column": 19,
-      "stop_line": 232,
+      "stop_line": 230,
       "stop_column": 71,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9085,9 +8989,9 @@
       "severity": "error"
     },
     {
-      "line": 235,
+      "line": 233,
       "column": 40,
-      "stop_line": 235,
+      "stop_line": 233,
       "stop_column": 99,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9097,9 +9001,9 @@
       "severity": "error"
     },
     {
-      "line": 236,
+      "line": 234,
       "column": 42,
-      "stop_line": 236,
+      "stop_line": 234,
       "stop_column": 103,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9109,21 +9013,21 @@
       "severity": "error"
     },
     {
-      "line": 240,
+      "line": 238,
       "column": 9,
-      "stop_line": 240,
+      "stop_line": 238,
       "stop_column": 28,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
       "name": "bad-override",
-      "description": "Class member `OTXInstanceSegModel.forward_for_tracing` overrides parent class `OTXModel` in an inconsistent manner\n  `OTXInstanceSegModel.forward_for_tracing` has type `BoundMethod[OTXInstanceSegModel, (self: OTXInstanceSegModel, inputs: Tensor) -> tuple[Tensor, ...]]`, which is not assignable to `BoundMethod[OTXInstanceSegModel, (self: OTXInstanceSegModel, *args: Unknown, **kwargs: Unknown) -> Tensor | dict[str, Tensor]]`, the type of `OTXModel.forward_for_tracing`",
+      "description": "Class member `OTXInstanceSegModel.forward_for_tracing` overrides parent class `OTXModel` in an inconsistent manner\n  `OTXInstanceSegModel.forward_for_tracing` has type `BoundMethod[OTXInstanceSegModel, (self: OTXInstanceSegModel, inputs: Tensor) -> dict[str, Any] | tuple[Tensor, ...]]`, which is not assignable to `BoundMethod[OTXInstanceSegModel, (self: OTXInstanceSegModel, *args: Unknown, **kwargs: Unknown) -> Tensor | dict[str, Tensor]]`, the type of `OTXModel.forward_for_tracing`",
       "concise_description": "Class member `OTXInstanceSegModel.forward_for_tracing` overrides parent class `OTXModel` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 250,
+      "line": 248,
       "column": 16,
-      "stop_line": 250,
+      "stop_line": 248,
       "stop_column": 33,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9133,21 +9037,9 @@
       "severity": "error"
     },
     {
-      "line": 299,
-      "column": 51,
-      "stop_line": 299,
-      "stop_column": 56,
-      "path": "src/otx/backend/native/models/instance_segmentation/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `OTXPredictionBatch | Tensor` is not assignable to parameter `outputs` with type `OTXPredictionBatch` in function `OTXInstanceSegModel._filter_outputs_by_threshold`",
-      "concise_description": "Argument `OTXPredictionBatch | Tensor` is not assignable to parameter `outputs` with type `OTXPredictionBatch` in function `OTXInstanceSegModel._filter_outputs_by_threshold`",
-      "severity": "error"
-    },
-    {
-      "line": 347,
+      "line": 348,
       "column": 9,
-      "stop_line": 347,
+      "stop_line": 348,
       "stop_column": 27,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9157,9 +9049,9 @@
       "severity": "error"
     },
     {
-      "line": 398,
+      "line": 403,
       "column": 39,
-      "stop_line": 398,
+      "stop_line": 403,
       "stop_column": 81,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9169,9 +9061,9 @@
       "severity": "error"
     },
     {
-      "line": 398,
+      "line": 403,
       "column": 40,
-      "stop_line": 398,
+      "stop_line": 403,
       "stop_column": 80,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9181,9 +9073,9 @@
       "severity": "error"
     },
     {
-      "line": 399,
+      "line": 404,
       "column": 27,
-      "stop_line": 399,
+      "stop_line": 404,
       "stop_column": 48,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9193,9 +9085,9 @@
       "severity": "error"
     },
     {
-      "line": 400,
+      "line": 405,
       "column": 27,
-      "stop_line": 400,
+      "stop_line": 405,
       "stop_column": 48,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9205,9 +9097,9 @@
       "severity": "error"
     },
     {
-      "line": 401,
+      "line": 406,
       "column": 27,
-      "stop_line": 401,
+      "stop_line": 406,
       "stop_column": 48,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9217,9 +9109,9 @@
       "severity": "error"
     },
     {
-      "line": 409,
+      "line": 414,
       "column": 26,
-      "stop_line": 409,
+      "stop_line": 414,
       "stop_column": 32,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9229,9 +9121,9 @@
       "severity": "error"
     },
     {
-      "line": 411,
+      "line": 416,
       "column": 25,
-      "stop_line": 411,
+      "stop_line": 416,
       "stop_column": 30,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9241,9 +9133,9 @@
       "severity": "error"
     },
     {
-      "line": 440,
+      "line": 445,
       "column": 30,
-      "stop_line": 440,
+      "stop_line": 445,
       "stop_column": 41,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9253,9 +9145,9 @@
       "severity": "error"
     },
     {
-      "line": 441,
+      "line": 446,
       "column": 60,
-      "stop_line": 441,
+      "stop_line": 446,
       "stop_column": 70,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9265,9 +9157,9 @@
       "severity": "error"
     },
     {
-      "line": 458,
+      "line": 463,
       "column": 30,
-      "stop_line": 458,
+      "stop_line": 463,
       "stop_column": 41,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9276,34 +9168,22 @@
       "concise_description": "Object of class `NoneType` has no attribute `data`",
       "severity": "error"
     },
-    {
-      "line": 473,
-      "column": 31,
-      "stop_line": 473,
-      "stop_column": 40,
-      "path": "src/otx/backend/native/models/instance_segmentation/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Size` is not assignable to parameter `img_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "concise_description": "Argument `Size` is not assignable to parameter `img_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "severity": "error"
-    },
     {
       "line": 474,
-      "column": 31,
+      "column": 27,
       "stop_line": 474,
-      "stop_column": 40,
+      "stop_column": 80,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Size` is not assignable to parameter `ori_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "concise_description": "Argument `Size` is not assignable to parameter `ori_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[int, int], ori_shape=tuple[int, int])",
       "severity": "error"
     },
     {
-      "line": 484,
+      "line": 482,
       "column": 40,
-      "stop_line": 484,
+      "stop_line": 482,
       "stop_column": 57,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9313,9 +9193,9 @@
       "severity": "error"
     },
     {
-      "line": 485,
+      "line": 483,
       "column": 33,
-      "stop_line": 485,
+      "stop_line": 483,
       "stop_column": 54,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9325,9 +9205,9 @@
       "severity": "error"
     },
     {
-      "line": 488,
+      "line": 486,
       "column": 44,
-      "stop_line": 488,
+      "stop_line": 486,
       "stop_column": 54,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9337,9 +9217,9 @@
       "severity": "error"
     },
     {
-      "line": 490,
+      "line": 488,
       "column": 49,
-      "stop_line": 490,
+      "stop_line": 488,
       "stop_column": 59,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9349,9 +9229,9 @@
       "severity": "error"
     },
     {
-      "line": 494,
+      "line": 492,
       "column": 13,
-      "stop_line": 496,
+      "stop_line": 494,
       "stop_column": 40,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9361,21 +9241,9 @@
       "severity": "error"
     },
     {
-      "line": 507,
-      "column": 95,
-      "stop_line": 507,
-      "stop_column": 108,
-      "path": "src/otx/backend/native/models/instance_segmentation/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `otx.backend.native.models.instance_segmentation.segmentors.two_stage.TwoStageDetector.extract_feat`",
-      "concise_description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `otx.backend.native.models.instance_segmentation.segmentors.two_stage.TwoStageDetector.extract_feat`",
-      "severity": "error"
-    },
-    {
-      "line": 509,
+      "line": 508,
       "column": 26,
-      "stop_line": 509,
+      "stop_line": 508,
       "stop_column": 48,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9385,9 +9253,9 @@
       "severity": "error"
     },
     {
-      "line": 510,
+      "line": 509,
       "column": 23,
-      "stop_line": 510,
+      "stop_line": 509,
       "stop_column": 49,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9397,9 +9265,9 @@
       "severity": "error"
     },
     {
-      "line": 518,
+      "line": 517,
       "column": 28,
-      "stop_line": 518,
+      "stop_line": 517,
       "stop_column": 43,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9410,20 +9278,8 @@
     },
     {
       "line": 550,
-      "column": 36,
-      "stop_line": 550,
-      "stop_column": 49,
-      "path": "src/otx/backend/native/models/instance_segmentation/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `tensors` with type `Tensor` in function `torchvision.models.detection.image_list.ImageList.__init__`",
-      "concise_description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `tensors` with type `Tensor` in function `torchvision.models.detection.image_list.ImageList.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 551,
       "column": 28,
-      "stop_line": 551,
+      "stop_line": 550,
       "stop_column": 42,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9433,9 +9289,9 @@
       "severity": "error"
     },
     {
-      "line": 552,
+      "line": 551,
       "column": 29,
-      "stop_line": 552,
+      "stop_line": 551,
       "stop_column": 49,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9445,9 +9301,9 @@
       "severity": "error"
     },
     {
-      "line": 561,
+      "line": 560,
       "column": 20,
-      "stop_line": 561,
+      "stop_line": 560,
       "stop_column": 42,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9457,9 +9313,9 @@
       "severity": "error"
     },
     {
-      "line": 564,
+      "line": 563,
       "column": 20,
-      "stop_line": 564,
+      "stop_line": 563,
       "stop_column": 48,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9469,9 +9325,9 @@
       "severity": "error"
     },
     {
-      "line": 565,
+      "line": 564,
       "column": 28,
-      "stop_line": 565,
+      "stop_line": 564,
       "stop_column": 55,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9481,9 +9337,9 @@
       "severity": "error"
     },
     {
-      "line": 566,
+      "line": 565,
       "column": 16,
-      "stop_line": 566,
+      "stop_line": 565,
       "stop_column": 43,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9493,9 +9349,9 @@
       "severity": "error"
     },
     {
-      "line": 590,
+      "line": 589,
       "column": 33,
-      "stop_line": 590,
+      "stop_line": 589,
       "stop_column": 54,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9505,9 +9361,9 @@
       "severity": "error"
     },
     {
-      "line": 609,
+      "line": 608,
       "column": 39,
-      "stop_line": 609,
+      "stop_line": 608,
       "stop_column": 43,
       "path": "src/otx/backend/native/models/instance_segmentation/base.py",
       "code": -2,
@@ -9524,7 +9380,7 @@
       "path": "src/otx/backend/native/models/instance_segmentation/heads/bbox_head.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `Module` and `int`\n  Expected `__mul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `Module` and `int`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__rmul__`",
       "concise_description": "`*` is not supported between `Module` and `int`",
       "severity": "error"
     },
@@ -9536,8 +9392,8 @@
       "path": "src/otx/backend/native/models/instance_segmentation/heads/bbox_head.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Module | Tensor | Unknown` is not assignable to parameter `out_features` with type `int` in function `torch.nn.modules.linear.Linear.__init__`",
-      "concise_description": "Argument `Module | Tensor | Unknown` is not assignable to parameter `out_features` with type `int` in function `torch.nn.modules.linear.Linear.__init__`",
+      "description": "Argument `Module | Tensor | int` is not assignable to parameter `out_features` with type `int` in function `torch.nn.modules.linear.Linear.__init__`",
+      "concise_description": "Argument `Module | Tensor | int` is not assignable to parameter `out_features` with type `int` in function `torch.nn.modules.linear.Linear.__init__`",
       "severity": "error"
     },
     {
@@ -9692,7 +9548,7 @@
       "path": "src/otx/backend/native/models/instance_segmentation/heads/bbox_head.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "`*` is not supported between `Module` and `int`\n  Expected `__mul__` to be a callable, got `Module | Tensor`",
+      "description": "`*` is not supported between `Module` and `int`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__rmul__`",
       "concise_description": "`*` is not supported between `Module` and `int`",
       "severity": "error"
     },
@@ -9704,8 +9560,8 @@
       "path": "src/otx/backend/native/models/instance_segmentation/heads/bbox_head.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Module | Tensor | Unknown` is not assignable to parameter `out_features` with type `int` in function `torch.nn.modules.linear.Linear.__init__`",
-      "concise_description": "Argument `Module | Tensor | Unknown` is not assignable to parameter `out_features` with type `int` in function `torch.nn.modules.linear.Linear.__init__`",
+      "description": "Argument `Module | Tensor | int` is not assignable to parameter `out_features` with type `int` in function `torch.nn.modules.linear.Linear.__init__`",
+      "concise_description": "Argument `Module | Tensor | int` is not assignable to parameter `out_features` with type `int` in function `torch.nn.modules.linear.Linear.__init__`",
       "severity": "error"
     },
     {
@@ -11472,18 +11328,6 @@
       "concise_description": "Class member `MaskRCNN.forward` overrides parent class `MaskRCNN` in an inconsistent manner",
       "severity": "error"
     },
-    {
-      "line": 39,
-      "column": 32,
-      "stop_line": 39,
-      "stop_column": 45,
-      "path": "src/otx/backend/native/models/instance_segmentation/segmentors/maskrcnn_tv.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `tensors` with type `Tensor` in function `torchvision.models.detection.image_list.ImageList.__init__`",
-      "concise_description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `tensors` with type `Tensor` in function `torchvision.models.detection.image_list.ImageList.__init__`",
-      "severity": "error"
-    },
     {
       "line": 47,
       "column": 27,
@@ -11545,9 +11389,9 @@
       "severity": "error"
     },
     {
-      "line": 94,
+      "line": 96,
       "column": 16,
-      "stop_line": 96,
+      "stop_line": 98,
       "stop_column": 10,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11557,9 +11401,9 @@
       "severity": "error"
     },
     {
-      "line": 94,
+      "line": 96,
       "column": 17,
-      "stop_line": 94,
+      "stop_line": 96,
       "stop_column": 70,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11569,9 +11413,9 @@
       "severity": "error"
     },
     {
-      "line": 94,
+      "line": 96,
       "column": 17,
-      "stop_line": 94,
+      "stop_line": 96,
       "stop_column": 70,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11581,9 +11425,9 @@
       "severity": "error"
     },
     {
-      "line": 129,
+      "line": 131,
       "column": 30,
-      "stop_line": 129,
+      "stop_line": 131,
       "stop_column": 36,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11593,9 +11437,9 @@
       "severity": "error"
     },
     {
-      "line": 131,
+      "line": 133,
       "column": 33,
-      "stop_line": 131,
+      "stop_line": 133,
       "stop_column": 39,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11605,21 +11449,9 @@
       "severity": "error"
     },
     {
-      "line": 159,
-      "column": 31,
-      "stop_line": 159,
-      "stop_column": 50,
-      "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `TwoStageDetector.extract_feat`",
-      "concise_description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `TwoStageDetector.extract_feat`",
-      "severity": "error"
-    },
-    {
-      "line": 170,
+      "line": 172,
       "column": 69,
-      "stop_line": 170,
+      "stop_line": 172,
       "stop_column": 102,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11629,9 +11461,9 @@
       "severity": "error"
     },
     {
-      "line": 187,
+      "line": 189,
       "column": 13,
-      "stop_line": 187,
+      "stop_line": 189,
       "stop_column": 46,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11641,21 +11473,9 @@
       "severity": "error"
     },
     {
-      "line": 221,
-      "column": 31,
-      "stop_line": 221,
-      "stop_column": 44,
-      "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `TwoStageDetector.extract_feat`",
-      "concise_description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `batch_inputs` with type `Tensor` in function `TwoStageDetector.extract_feat`",
-      "severity": "error"
-    },
-    {
-      "line": 223,
+      "line": 225,
       "column": 28,
-      "stop_line": 223,
+      "stop_line": 225,
       "stop_column": 49,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11665,9 +11485,9 @@
       "severity": "error"
     },
     {
-      "line": 225,
+      "line": 227,
       "column": 16,
-      "stop_line": 225,
+      "stop_line": 227,
       "stop_column": 37,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11677,9 +11497,9 @@
       "severity": "error"
     },
     {
-      "line": 254,
+      "line": 256,
       "column": 28,
-      "stop_line": 254,
+      "stop_line": 256,
       "stop_column": 48,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11689,9 +11509,9 @@
       "severity": "error"
     },
     {
-      "line": 259,
+      "line": 261,
       "column": 33,
-      "stop_line": 259,
+      "stop_line": 261,
       "stop_column": 53,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11701,9 +11521,9 @@
       "severity": "error"
     },
     {
-      "line": 267,
+      "line": 269,
       "column": 30,
-      "stop_line": 267,
+      "stop_line": 269,
       "stop_column": 52,
       "path": "src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py",
       "code": -2,
@@ -11929,9 +11749,9 @@
       "severity": "error"
     },
     {
-      "line": 67,
+      "line": 66,
       "column": 9,
-      "stop_line": 67,
+      "stop_line": 66,
       "stop_column": 26,
       "path": "src/otx/backend/native/models/keypoint_detection/base.py",
       "code": -2,
@@ -11941,9 +11761,9 @@
       "severity": "error"
     },
     {
-      "line": 133,
+      "line": 138,
       "column": 35,
-      "stop_line": 133,
+      "stop_line": 138,
       "stop_column": 75,
       "path": "src/otx/backend/native/models/keypoint_detection/base.py",
       "code": -2,
@@ -11953,9 +11773,9 @@
       "severity": "error"
     },
     {
-      "line": 169,
+      "line": 174,
       "column": 9,
-      "stop_line": 169,
+      "stop_line": 174,
       "stop_column": 28,
       "path": "src/otx/backend/native/models/keypoint_detection/base.py",
       "code": -2,
@@ -11965,9 +11785,9 @@
       "severity": "error"
     },
     {
-      "line": 188,
+      "line": 193,
       "column": 31,
-      "stop_line": 188,
+      "stop_line": 193,
       "stop_column": 44,
       "path": "src/otx/backend/native/models/keypoint_detection/base.py",
       "code": -2,
@@ -11977,9 +11797,9 @@
       "severity": "error"
     },
     {
-      "line": 189,
+      "line": 194,
       "column": 31,
-      "stop_line": 189,
+      "stop_line": 194,
       "stop_column": 44,
       "path": "src/otx/backend/native/models/keypoint_detection/base.py",
       "code": -2,
@@ -13033,9 +12853,9 @@
       "severity": "error"
     },
     {
-      "line": 81,
+      "line": 80,
       "column": 9,
-      "stop_line": 81,
+      "stop_line": 80,
       "stop_column": 26,
       "path": "src/otx/backend/native/models/segmentation/base.py",
       "code": -2,
@@ -13045,9 +12865,9 @@
       "severity": "error"
     },
     {
-      "line": 89,
+      "line": 88,
       "column": 30,
-      "stop_line": 89,
+      "stop_line": 88,
       "stop_column": 42,
       "path": "src/otx/backend/native/models/segmentation/base.py",
       "code": -2,
@@ -13057,21 +12877,9 @@
       "severity": "error"
     },
     {
-      "line": 251,
-      "column": 32,
-      "stop_line": 251,
-      "stop_column": 84,
-      "path": "src/otx/backend/native/models/segmentation/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor | ndarray[Unknown, Unknown]]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
-      "concise_description": "Argument `list[Tensor | ndarray[Unknown, Unknown]]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
-      "severity": "error"
-    },
-    {
-      "line": 253,
+      "line": 248,
       "column": 19,
-      "stop_line": 253,
+      "stop_line": 248,
       "stop_column": 71,
       "path": "src/otx/backend/native/models/segmentation/base.py",
       "code": -2,
@@ -13081,9 +12889,9 @@
       "severity": "error"
     },
     {
-      "line": 257,
+      "line": 252,
       "column": 40,
-      "stop_line": 257,
+      "stop_line": 252,
       "stop_column": 99,
       "path": "src/otx/backend/native/models/segmentation/base.py",
       "code": -2,
@@ -13093,9 +12901,9 @@
       "severity": "error"
     },
     {
-      "line": 258,
+      "line": 253,
       "column": 42,
-      "stop_line": 258,
+      "stop_line": 253,
       "stop_column": 103,
       "path": "src/otx/backend/native/models/segmentation/base.py",
       "code": -2,
@@ -13105,9 +12913,9 @@
       "severity": "error"
     },
     {
-      "line": 262,
+      "line": 257,
       "column": 9,
-      "stop_line": 262,
+      "stop_line": 257,
       "stop_column": 28,
       "path": "src/otx/backend/native/models/segmentation/base.py",
       "code": -2,
@@ -13117,9 +12925,9 @@
       "severity": "error"
     },
     {
-      "line": 292,
+      "line": 287,
       "column": 31,
-      "stop_line": 292,
+      "stop_line": 287,
       "stop_column": 40,
       "path": "src/otx/backend/native/models/segmentation/base.py",
       "code": -2,
@@ -13129,9 +12937,9 @@
       "severity": "error"
     },
     {
-      "line": 293,
+      "line": 288,
       "column": 31,
-      "stop_line": 293,
+      "stop_line": 288,
       "stop_column": 40,
       "path": "src/otx/backend/native/models/segmentation/base.py",
       "code": -2,
@@ -13141,9 +12949,9 @@
       "severity": "error"
     },
     {
-      "line": 96,
+      "line": 99,
       "column": 80,
-      "stop_line": 96,
+      "stop_line": 99,
       "stop_column": 90,
       "path": "src/otx/backend/native/models/segmentation/dino_v2_seg.py",
       "code": -2,
@@ -13153,9 +12961,9 @@
       "severity": "error"
     },
     {
-      "line": 97,
+      "line": 100,
       "column": 13,
-      "stop_line": 97,
+      "stop_line": 100,
       "stop_column": 37,
       "path": "src/otx/backend/native/models/segmentation/dino_v2_seg.py",
       "code": -2,
@@ -14221,21 +14029,9 @@
       "severity": "error"
     },
     {
-      "line": 34,
-      "column": 53,
-      "stop_line": 34,
-      "stop_column": 54,
-      "path": "src/otx/backend/native/tools/explain/explain_algo.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | object` is not assignable to parameter `input` with type `Tensor` in function `torch.nn.functional.adaptive_avg_pool2d`",
-      "concise_description": "Argument `Tensor | object` is not assignable to parameter `input` with type `Tensor` in function `torch.nn.functional.adaptive_avg_pool2d`",
-      "severity": "error"
-    },
-    {
-      "line": 39,
+      "line": 38,
       "column": 52,
-      "stop_line": 39,
+      "stop_line": 38,
       "stop_column": 63,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14245,9 +14041,9 @@
       "severity": "error"
     },
     {
-      "line": 92,
+      "line": 91,
       "column": 27,
-      "stop_line": 92,
+      "stop_line": 91,
       "stop_column": 47,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14257,9 +14053,9 @@
       "severity": "error"
     },
     {
-      "line": 94,
+      "line": 93,
       "column": 31,
-      "stop_line": 94,
+      "stop_line": 93,
       "stop_column": 47,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14269,9 +14065,9 @@
       "severity": "error"
     },
     {
-      "line": 95,
+      "line": 94,
       "column": 36,
-      "stop_line": 95,
+      "stop_line": 94,
       "stop_column": 56,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14281,9 +14077,9 @@
       "severity": "error"
     },
     {
-      "line": 134,
+      "line": 133,
       "column": 27,
-      "stop_line": 134,
+      "stop_line": 133,
       "stop_column": 47,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14293,9 +14089,9 @@
       "severity": "error"
     },
     {
-      "line": 136,
+      "line": 135,
       "column": 37,
-      "stop_line": 136,
+      "stop_line": 135,
       "stop_column": 53,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14305,9 +14101,9 @@
       "severity": "error"
     },
     {
-      "line": 139,
+      "line": 138,
       "column": 75,
-      "stop_line": 139,
+      "stop_line": 138,
       "stop_column": 76,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14317,9 +14113,9 @@
       "severity": "error"
     },
     {
-      "line": 194,
+      "line": 193,
       "column": 9,
-      "stop_line": 194,
+      "stop_line": 193,
       "stop_column": 13,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14329,9 +14125,9 @@
       "severity": "error"
     },
     {
-      "line": 281,
+      "line": 280,
       "column": 9,
-      "stop_line": 281,
+      "stop_line": 280,
       "stop_column": 13,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14341,9 +14137,9 @@
       "severity": "error"
     },
     {
-      "line": 296,
+      "line": 295,
       "column": 51,
-      "stop_line": 296,
+      "stop_line": 295,
       "stop_column": 61,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14353,9 +14149,9 @@
       "severity": "error"
     },
     {
-      "line": 300,
+      "line": 299,
       "column": 62,
-      "stop_line": 300,
+      "stop_line": 299,
       "stop_column": 72,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14365,9 +14161,9 @@
       "severity": "error"
     },
     {
-      "line": 306,
+      "line": 305,
       "column": 17,
-      "stop_line": 306,
+      "stop_line": 305,
       "stop_column": 34,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14377,9 +14173,9 @@
       "severity": "error"
     },
     {
-      "line": 338,
+      "line": 337,
       "column": 9,
-      "stop_line": 338,
+      "stop_line": 337,
       "stop_column": 13,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14389,9 +14185,9 @@
       "severity": "error"
     },
     {
-      "line": 342,
+      "line": 341,
       "column": 15,
-      "stop_line": 342,
+      "stop_line": 341,
       "stop_column": 23,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14401,9 +14197,9 @@
       "severity": "error"
     },
     {
-      "line": 356,
+      "line": 355,
       "column": 16,
-      "stop_line": 356,
+      "stop_line": 355,
       "stop_column": 48,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -14413,9 +14209,9 @@
       "severity": "error"
     },
     {
-      "line": 363,
+      "line": 362,
       "column": 10,
-      "stop_line": 363,
+      "stop_line": 362,
       "stop_column": 18,
       "path": "src/otx/backend/native/tools/explain/explain_algo.py",
       "code": -2,
@@ -15121,27 +14917,15 @@
       "severity": "error"
     },
     {
-      "line": 430,
-      "column": 31,
-      "stop_line": 430,
-      "stop_column": 40,
-      "path": "src/otx/backend/openvino/models/base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Size` is not assignable to parameter `img_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "concise_description": "Argument `Size` is not assignable to parameter `img_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "severity": "error"
-    },
-    {
-      "line": 431,
-      "column": 31,
-      "stop_line": 431,
-      "stop_column": 40,
+      "line": 426,
+      "column": 27,
+      "stop_line": 426,
+      "stop_column": 80,
       "path": "src/otx/backend/openvino/models/base.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Size` is not assignable to parameter `ori_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "concise_description": "Argument `Size` is not assignable to parameter `ori_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])",
       "severity": "error"
     },
     {
@@ -15601,9 +15385,9 @@
       "severity": "error"
     },
     {
-      "line": 150,
+      "line": 149,
       "column": 18,
-      "stop_line": 150,
+      "stop_line": 149,
       "stop_column": 29,
       "path": "src/otx/cli/cli.py",
       "code": -2,
@@ -15649,9 +15433,9 @@
       "severity": "error"
     },
     {
-      "line": 381,
+      "line": 403,
       "column": 57,
-      "stop_line": 381,
+      "stop_line": 403,
       "stop_column": 66,
       "path": "src/otx/cli/utils/jsonargparse.py",
       "code": -2,
@@ -15661,9 +15445,9 @@
       "severity": "error"
     },
     {
-      "line": 411,
+      "line": 433,
       "column": 41,
-      "stop_line": 411,
+      "stop_line": 433,
       "stop_column": 53,
       "path": "src/otx/cli/utils/jsonargparse.py",
       "code": -2,
@@ -15685,45 +15469,33 @@
       "severity": "error"
     },
     {
-      "line": 182,
+      "line": 214,
       "column": 21,
-      "stop_line": 182,
-      "stop_column": 47,
+      "stop_line": 214,
+      "stop_column": 61,
       "path": "src/otx/data/dataset/base.py",
       "code": -2,
       "name": "bad-assignment",
-      "description": "`signedinteger[_64Bit]` is not assignable to variable `index` with type `int`",
-      "concise_description": "`signedinteger[_64Bit]` is not assignable to variable `index` with type `int`",
-      "severity": "error"
-    },
-    {
-      "line": 75,
-      "column": 18,
-      "stop_line": 75,
-      "stop_column": 72,
-      "path": "src/otx/data/dataset/classification.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Categories` has no attribute `labels`\nObject of class `NoneType` has no attribute `labels`",
-      "concise_description": "Object of class `Categories` has no attribute `labels`\nObject of class `NoneType` has no attribute `labels`",
+      "description": "`bool | float | int` is not assignable to variable `index` with type `int`",
+      "concise_description": "`bool | float | int` is not assignable to variable `index` with type `int`",
       "severity": "error"
     },
     {
-      "line": 157,
-      "column": 18,
-      "stop_line": 157,
-      "stop_column": 72,
+      "line": 159,
+      "column": 9,
+      "stop_line": 159,
+      "stop_column": 19,
       "path": "src/otx/data/dataset/classification.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `Categories` has no attribute `labels`\nObject of class `NoneType` has no attribute `labels`",
-      "concise_description": "Object of class `Categories` has no attribute `labels`\nObject of class `NoneType` has no attribute `labels`",
+      "description": "Object of class `OTXSample` has no attribute `label`",
+      "concise_description": "Object of class `OTXSample` has no attribute `label`",
       "severity": "error"
     },
     {
-      "line": 169,
+      "line": 160,
       "column": 16,
-      "stop_line": 169,
+      "stop_line": 160,
       "stop_column": 44,
       "path": "src/otx/data/dataset/classification.py",
       "code": -2,
@@ -15733,9 +15505,9 @@
       "severity": "error"
     },
     {
-      "line": 171,
+      "line": 162,
       "column": 42,
-      "stop_line": 171,
+      "stop_line": 162,
       "stop_column": 54,
       "path": "src/otx/data/dataset/classification.py",
       "code": -2,
@@ -15745,9 +15517,9 @@
       "severity": "error"
     },
     {
-      "line": 171,
+      "line": 162,
       "column": 86,
-      "stop_line": 171,
+      "stop_line": 162,
       "stop_column": 98,
       "path": "src/otx/data/dataset/classification.py",
       "code": -2,
@@ -15757,21 +15529,21 @@
       "severity": "error"
     },
     {
-      "line": 275,
-      "column": 59,
-      "stop_line": 275,
-      "stop_column": 77,
+      "line": 274,
+      "column": 9,
+      "stop_line": 274,
+      "stop_column": 19,
       "path": "src/otx/data/dataset/classification.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Categories | None` is not assignable to parameter `dm_label_categories` with type `HierarchicalLabelCategories` in function `otx.types.label.HLabelInfo.from_dm_label_groups`",
-      "concise_description": "Argument `Categories | None` is not assignable to parameter `dm_label_categories` with type `HierarchicalLabelCategories` in function `otx.types.label.HLabelInfo.from_dm_label_groups`",
+      "name": "missing-attribute",
+      "description": "Object of class `OTXSample` has no attribute `label`",
+      "concise_description": "Object of class `OTXSample` has no attribute `label`",
       "severity": "error"
     },
     {
-      "line": 288,
+      "line": 275,
       "column": 16,
-      "stop_line": 288,
+      "stop_line": 275,
       "stop_column": 44,
       "path": "src/otx/data/dataset/classification.py",
       "code": -2,
@@ -15781,69 +15553,45 @@
       "severity": "error"
     },
     {
-      "line": 336,
-      "column": 20,
-      "stop_line": 336,
-      "stop_column": 44,
-      "path": "src/otx/data/dataset/classification.py",
+      "line": 21,
+      "column": 7,
+      "stop_line": 21,
+      "stop_column": 26,
+      "path": "src/otx/data/dataset/detection.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Categories` has no attribute `items`\nObject of class `NoneType` has no attribute `items`",
-      "concise_description": "Object of class `Categories` has no attribute `items`\nObject of class `NoneType` has no attribute `items`",
+      "name": "inconsistent-inheritance",
+      "description": "Field `transforms` is declared `((...) -> Unknown) | CPUAugmentationPipeline | Compose | dict[str, ((...) -> Unknown) | Compose | list[(...) -> Unknown]] | list[(...) -> Unknown] | None` in ancestor `class OTXDataset: ...\n`, which is not assignable to the type `CPUAugmentationPipeline` implied by multiple inheritance",
+      "concise_description": "Field `transforms` is declared `((...) -> Unknown) | CPUAugmentationPipeline | Compose | dict[str, ((...) -> Unknown) | Compose | list[(...) -> Unknown]] | list[(...) -> Unknown] | None` in ancestor `class OTXDataset: ...\n`, which is not assignable to the type `CPUAugmentationPipeline` implied by multiple inheritance",
       "severity": "error"
     },
     {
-      "line": 338,
-      "column": 52,
-      "stop_line": 338,
-      "stop_column": 76,
-      "path": "src/otx/data/dataset/classification.py",
+      "line": 60,
+      "column": 18,
+      "stop_line": 60,
+      "stop_column": 72,
+      "path": "src/otx/data/dataset/detection.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `Categories` has no attribute `items`\nObject of class `NoneType` has no attribute `items`",
-      "concise_description": "Object of class `Categories` has no attribute `items`\nObject of class `NoneType` has no attribute `items`",
+      "description": "Object of class `Categories` has no attribute `labels`\nObject of class `NoneType` has no attribute `labels`",
+      "concise_description": "Object of class `Categories` has no attribute `labels`\nObject of class `NoneType` has no attribute `labels`",
       "severity": "error"
     },
     {
-      "line": 340,
-      "column": 28,
-      "stop_line": 340,
-      "stop_column": 52,
-      "path": "src/otx/data/dataset/classification.py",
+      "line": 86,
+      "column": 9,
+      "stop_line": 86,
+      "stop_column": 26,
+      "path": "src/otx/data/dataset/detection.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Categories` has no attribute `items`\nObject of class `NoneType` has no attribute `items`",
-      "concise_description": "Object of class `Categories` has no attribute `items`\nObject of class `NoneType` has no attribute `items`",
+      "name": "bad-override",
+      "description": "Class member `OTXDetectionDataset._apply_transforms` overrides parent class `OTXDataset` in an inconsistent manner\n  `OTXDetectionDataset._apply_transforms` has type `BoundMethod[OTXDetectionDataset, (self: OTXDetectionDataset, entity: DetectionSample) -> DetectionSample | None]`, which is not assignable to `BoundMethod[OTXDetectionDataset, (self: OTXDetectionDataset, entity: OTXSample) -> OTXSample | None]`, the type of `OTXDataset._apply_transforms`",
+      "concise_description": "Class member `OTXDetectionDataset._apply_transforms` overrides parent class `OTXDataset` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 65,
-      "column": 23,
-      "stop_line": 65,
-      "stop_column": 77,
-      "path": "src/otx/data/dataset/detection.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Categories` has no attribute `labels`\nObject of class `NoneType` has no attribute `labels`",
-      "concise_description": "Object of class `Categories` has no attribute `labels`\nObject of class `NoneType` has no attribute `labels`",
-      "severity": "error"
-    },
-    {
-      "line": 91,
-      "column": 9,
-      "stop_line": 91,
-      "stop_column": 26,
-      "path": "src/otx/data/dataset/detection.py",
-      "code": -2,
-      "name": "bad-override",
-      "description": "Class member `OTXDetectionDataset._apply_transforms` overrides parent class `OTXDataset` in an inconsistent manner\n  `OTXDetectionDataset._apply_transforms` has type `BoundMethod[OTXDetectionDataset, (self: OTXDetectionDataset, entity: DetectionSample) -> DetectionSample | None]`, which is not assignable to `BoundMethod[OTXDetectionDataset, (self: OTXDetectionDataset, entity: OTXSample) -> OTXSample | None]`, the type of `OTXDataset._apply_transforms`",
-      "concise_description": "Class member `OTXDetectionDataset._apply_transforms` overrides parent class `OTXDataset` in an inconsistent manner",
-      "severity": "error"
-    },
-    {
-      "line": 94,
+      "line": 89,
       "column": 16,
-      "stop_line": 94,
+      "stop_line": 89,
       "stop_column": 49,
       "path": "src/otx/data/dataset/detection.py",
       "code": -2,
@@ -15853,9 +15601,9 @@
       "severity": "error"
     },
     {
-      "line": 56,
+      "line": 52,
       "column": 23,
-      "stop_line": 56,
+      "stop_line": 52,
       "stop_column": 77,
       "path": "src/otx/data/dataset/instance_segmentation.py",
       "code": -2,
@@ -15865,10 +15613,10 @@
       "severity": "error"
     },
     {
-      "line": 73,
+      "line": 63,
       "column": 18,
-      "stop_line": 73,
-      "stop_column": 72,
+      "stop_line": 63,
+      "stop_column": 76,
       "path": "src/otx/data/dataset/keypoint_detection.py",
       "code": -2,
       "name": "missing-attribute",
@@ -15877,9 +15625,33 @@
       "severity": "error"
     },
     {
-      "line": 69,
+      "line": 72,
+      "column": 9,
+      "stop_line": 72,
+      "stop_column": 23,
+      "path": "src/otx/data/dataset/keypoint_detection.py",
+      "code": -2,
+      "name": "missing-attribute",
+      "description": "Object of class `OTXSample` has no attribute `keypoints`",
+      "concise_description": "Object of class `OTXSample` has no attribute `keypoints`",
+      "severity": "error"
+    },
+    {
+      "line": 72,
+      "column": 44,
+      "stop_line": 72,
+      "stop_column": 58,
+      "path": "src/otx/data/dataset/keypoint_detection.py",
+      "code": -2,
+      "name": "missing-attribute",
+      "description": "Object of class `OTXSample` has no attribute `keypoints`",
+      "concise_description": "Object of class `OTXSample` has no attribute `keypoints`",
+      "severity": "error"
+    },
+    {
+      "line": 66,
       "column": 23,
-      "stop_line": 69,
+      "stop_line": 66,
       "stop_column": 77,
       "path": "src/otx/data/dataset/segmentation.py",
       "code": -2,
@@ -15925,9 +15697,9 @@
       "severity": "error"
     },
     {
-      "line": 124,
+      "line": 96,
       "column": 9,
-      "stop_line": 124,
+      "stop_line": 96,
       "stop_column": 21,
       "path": "src/otx/data/entity/base.py",
       "code": -2,
@@ -15937,9 +15709,9 @@
       "severity": "error"
     },
     {
-      "line": 167,
+      "line": 141,
       "column": 16,
-      "stop_line": 167,
+      "stop_line": 141,
       "stop_column": 22,
       "path": "src/otx/data/entity/base.py",
       "code": -2,
@@ -15949,227 +15721,167 @@
       "severity": "error"
     },
     {
-      "line": 291,
-      "column": 9,
-      "stop_line": 291,
-      "stop_column": 21,
-      "path": "src/otx/data/entity/base.py",
+      "line": 82,
+      "column": 34,
+      "stop_line": 86,
+      "stop_column": 10,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `Points._wrap_output` overrides parent class `TVTensor` in an inconsistent manner\n  `Points._wrap_output` has type `BoundMethod[type[Points], (cls: type[Points], output: Tensor, args: tuple[()] = ..., kwargs: Mapping[str, Any] | None = None) -> Points]`, which is not assignable to `BoundMethod[type[Points], (cls: type[Points], output: Tensor, args: Sequence[Any] = ..., kwargs: Mapping[str, Any] | None = None) -> Tensor]`, the type of `TVTensor._wrap_output`",
-      "concise_description": "Class member `Points._wrap_output` overrides parent class `TVTensor` in an inconsistent manner",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
       "severity": "error"
     },
     {
-      "line": 321,
-      "column": 14,
-      "stop_line": 321,
-      "stop_column": 18,
-      "path": "src/otx/data/entity/base.py",
+      "line": 101,
+      "column": 34,
+      "stop_line": 105,
+      "stop_column": 10,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[int] | tuple[int, int]` is not assignable to parameter `size` with type `list[int] | None` in function `torchvision.transforms.v2.functional._geometry._compute_resized_output_size`",
-      "concise_description": "Argument `list[int] | tuple[int, int]` is not assignable to parameter `size` with type `list[int] | None` in function `torchvision.transforms.v2.functional._geometry._compute_resized_output_size`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
       "severity": "error"
     },
     {
-      "line": 350,
-      "column": 12,
-      "stop_line": 350,
-      "stop_column": 71,
-      "path": "src/otx/data/entity/base.py",
+      "line": 120,
+      "column": 34,
+      "stop_line": 124,
+      "stop_column": 10,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to declared return type `Points`",
-      "concise_description": "Returned type `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to declared return type `Points`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
       "severity": "error"
     },
     {
-      "line": 350,
-      "column": 28,
-      "stop_line": 350,
-      "stop_column": 34,
-      "path": "src/otx/data/entity/base.py",
+      "line": 144,
+      "column": 51,
+      "stop_line": 149,
+      "stop_column": 14,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[int, int], dtype=dtype)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[int, int], dtype=dtype)",
       "severity": "error"
     },
     {
-      "line": 350,
-      "column": 28,
-      "stop_line": 350,
-      "stop_column": 34,
-      "path": "src/otx/data/entity/base.py",
+      "line": 151,
+      "column": 34,
+      "stop_line": 155,
+      "stop_column": 10,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
       "severity": "error"
     },
     {
-      "line": 350,
-      "column": 36,
-      "stop_line": 350,
-      "stop_column": 40,
-      "path": "src/otx/data/entity/base.py",
+      "line": 170,
+      "column": 34,
+      "stop_line": 174,
+      "stop_column": 10,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
       "severity": "error"
     },
     {
-      "line": 350,
-      "column": 36,
-      "stop_line": 350,
-      "stop_column": 40,
-      "path": "src/otx/data/entity/base.py",
+      "line": 197,
+      "column": 51,
+      "stop_line": 202,
+      "stop_column": 14,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[int, int], dtype=dtype)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[int, int], dtype=dtype)",
       "severity": "error"
     },
     {
-      "line": 350,
-      "column": 47,
-      "stop_line": 350,
-      "stop_column": 58,
-      "path": "src/otx/data/entity/base.py",
+      "line": 204,
+      "column": 34,
+      "stop_line": 208,
+      "stop_column": 10,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `canvas_size` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Unexpected keyword argument `canvas_size` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
       "severity": "error"
     },
     {
-      "line": 350,
-      "column": 47,
-      "stop_line": 350,
-      "stop_column": 58,
-      "path": "src/otx/data/entity/base.py",
+      "line": 225,
+      "column": 34,
+      "stop_line": 229,
+      "stop_column": 10,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `canvas_size` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Unexpected keyword argument `canvas_size` in function `torch._dynamo.decorators.wrap`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
       "severity": "error"
     },
     {
-      "line": 364,
-      "column": 63,
-      "stop_line": 364,
-      "stop_column": 70,
-      "path": "src/otx/data/entity/base.py",
+      "line": 267,
+      "column": 29,
+      "stop_line": 267,
+      "stop_column": 40,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int] | tuple[int, ...]` is not assignable to parameter `padding` with type `int | list[int]` in function `torchvision.transforms.v2.functional._geometry._parse_pad_padding`",
-      "concise_description": "Argument `list[int] | tuple[int, ...]` is not assignable to parameter `padding` with type `int | list[int]` in function `torchvision.transforms.v2.functional._geometry._parse_pad_padding`",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `label_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_labels`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `label_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_labels`",
       "severity": "error"
     },
     {
-      "line": 390,
-      "column": 12,
-      "stop_line": 390,
-      "stop_column": 71,
-      "path": "src/otx/data/entity/base.py",
+      "line": 269,
+      "column": 29,
+      "stop_line": 269,
+      "stop_column": 40,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to declared return type `Points`",
-      "concise_description": "Returned type `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to declared return type `Points`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[BoundingBoxes]` is not assignable to parameter `boxes_batch` with type `list[BoundingBoxes | None]` in function `otx.data.entity.validation.validate_bboxes`",
+      "concise_description": "Argument `list[BoundingBoxes]` is not assignable to parameter `boxes_batch` with type `list[BoundingBoxes | None]` in function `otx.data.entity.validation.validate_bboxes`",
       "severity": "error"
     },
     {
-      "line": 390,
-      "column": 28,
-      "stop_line": 390,
-      "stop_column": 34,
-      "path": "src/otx/data/entity/base.py",
+      "line": 271,
+      "column": 32,
+      "stop_line": 271,
+      "stop_column": 46,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `keypoints_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_keypoints`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `keypoints_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_keypoints`",
       "severity": "error"
     },
     {
-      "line": 390,
+      "line": 273,
       "column": 28,
-      "stop_line": 390,
-      "stop_column": 34,
-      "path": "src/otx/data/entity/base.py",
+      "stop_line": 273,
+      "stop_column": 38,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
+      "description": "Argument `list[Mask]` is not assignable to parameter `masks_batch` with type `list[Mask | None]` in function `otx.data.entity.validation.validate_masks`",
+      "concise_description": "Argument `list[Mask]` is not assignable to parameter `masks_batch` with type `list[Mask | None]` in function `otx.data.entity.validation.validate_masks`",
       "severity": "error"
     },
     {
-      "line": 390,
-      "column": 36,
-      "stop_line": 390,
-      "stop_column": 40,
-      "path": "src/otx/data/entity/base.py",
-      "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "severity": "error"
-    },
-    {
-      "line": 390,
-      "column": 36,
-      "stop_line": 390,
-      "stop_column": 40,
-      "path": "src/otx/data/entity/base.py",
-      "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
-      "severity": "error"
-    },
-    {
-      "line": 390,
-      "column": 47,
-      "stop_line": 390,
-      "stop_column": 58,
-      "path": "src/otx/data/entity/base.py",
-      "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `canvas_size` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Unexpected keyword argument `canvas_size` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "severity": "error"
-    },
-    {
-      "line": 390,
-      "column": 47,
-      "stop_line": 390,
-      "stop_column": 58,
-      "path": "src/otx/data/entity/base.py",
-      "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `canvas_size` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Unexpected keyword argument `canvas_size` in function `torch._dynamo.decorators.wrap`",
-      "severity": "error"
-    },
-    {
-      "line": 420,
-      "column": 16,
-      "stop_line": 420,
-      "stop_column": 50,
-      "path": "src/otx/data/entity/base.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to declared return type `Tensor`",
-      "concise_description": "Returned type `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to declared return type `Tensor`",
-      "severity": "error"
-    },
-    {
-      "line": 420,
-      "column": 32,
-      "stop_line": 420,
-      "stop_column": 38,
-      "path": "src/otx/data/entity/base.py",
+      "line": 290,
+      "column": 40,
+      "stop_line": 290,
+      "stop_column": 54,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
       "name": "bad-argument-type",
       "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
@@ -16177,11 +15889,11 @@
       "severity": "error"
     },
     {
-      "line": 420,
-      "column": 32,
-      "stop_line": 420,
-      "stop_column": 38,
-      "path": "src/otx/data/entity/base.py",
+      "line": 290,
+      "column": 40,
+      "stop_line": 290,
+      "stop_column": 54,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
       "name": "bad-argument-type",
       "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
@@ -16189,11 +15901,11 @@
       "severity": "error"
     },
     {
-      "line": 420,
-      "column": 40,
-      "stop_line": 420,
-      "stop_column": 44,
-      "path": "src/otx/data/entity/base.py",
+      "line": 290,
+      "column": 56,
+      "stop_line": 290,
+      "stop_column": 60,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
       "name": "unexpected-keyword",
       "description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
@@ -16201,11 +15913,11 @@
       "severity": "error"
     },
     {
-      "line": 420,
-      "column": 40,
-      "stop_line": 420,
-      "stop_column": 44,
-      "path": "src/otx/data/entity/base.py",
+      "line": 290,
+      "column": 56,
+      "stop_line": 290,
+      "stop_column": 60,
+      "path": "src/otx/data/entity/sample.py",
       "code": -2,
       "name": "unexpected-keyword",
       "description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
@@ -16213,6455 +15925,3239 @@
       "severity": "error"
     },
     {
-      "line": 77,
-      "column": 13,
-      "stop_line": 77,
-      "stop_column": 26,
+      "line": 340,
+      "column": 29,
+      "stop_line": 340,
+      "stop_column": 40,
       "path": "src/otx/data/entity/sample.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Sample` has no attribute `_img_info`",
-      "concise_description": "Object of class `Sample` has no attribute `_img_info`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `scores_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_scores`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `scores_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_scores`",
       "severity": "error"
     },
     {
-      "line": 83,
-      "column": 22,
-      "stop_line": 83,
-      "stop_column": 34,
+      "line": 342,
+      "column": 38,
+      "stop_line": 342,
+      "stop_column": 57,
       "path": "src/otx/data/entity/sample.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `(values: list[Any], context: list[str]) -> object` is not assignable to parameter `unflatten_fn` with type `(Iterable[Any], Any) -> Any` in function `torch.utils._pytree.register_pytree_node`",
-      "concise_description": "Argument `(values: list[Any], context: list[str]) -> object` is not assignable to parameter `unflatten_fn` with type `(Iterable[Any], Any) -> Any` in function `torch.utils._pytree.register_pytree_node`",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `feature_vector_batch` with type `list[Tensor | ndarray[Unknown, Unknown] | None]` in function `otx.data.entity.validation.validate_feature_vectors`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `feature_vector_batch` with type `list[Tensor | ndarray[Unknown, Unknown] | None]` in function `otx.data.entity.validation.validate_feature_vectors`",
       "severity": "error"
     },
     {
-      "line": 121,
-      "column": 34,
-      "stop_line": 125,
-      "stop_column": 10,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 109,
+      "column": 17,
+      "stop_line": 109,
+      "stop_column": 47,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Image]` is not assignable to parameter `tensor_list` with type `list[Tensor]` in function `otx.data.entity.utils.stack_batch`",
+      "concise_description": "Argument `list[Image]` is not assignable to parameter `tensor_list` with type `list[Tensor]` in function `otx.data.entity.utils.stack_batch`",
       "severity": "error"
     },
     {
-      "line": 139,
-      "column": 34,
-      "stop_line": 143,
-      "stop_column": 10,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 141,
+      "column": 25,
+      "stop_line": 141,
+      "stop_column": 113,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
+      "name": "bad-argument-type",
+      "description": "Argument `list[list[Image | Tensor]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchDetDataEntity.__init__`",
+      "concise_description": "Argument `list[list[Image | Tensor]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchDetDataEntity.__init__`",
       "severity": "error"
     },
     {
-      "line": 157,
-      "column": 34,
-      "stop_line": 161,
-      "stop_column": 10,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 146,
+      "column": 18,
+      "stop_line": 146,
+      "stop_column": 29,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
+      "name": "missing-attribute",
+      "description": "Object of class `OTXSample` has no attribute `tile`",
+      "concise_description": "Object of class `OTXSample` has no attribute `tile`",
       "severity": "error"
     },
     {
-      "line": 180,
-      "column": 51,
-      "stop_line": 185,
-      "stop_column": 14,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 188,
+      "column": 9,
+      "stop_line": 188,
+      "stop_column": 15,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ndarray[Unknown, Unknown], format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[int, int], dtype=dtype)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ndarray[Unknown, Unknown], format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[int, int], dtype=dtype)",
+      "name": "bad-override",
+      "description": "Class member `TileBatchInstSegDataEntity.unbind` overrides parent class `OTXTileBatchDataEntity` in an inconsistent manner\n  `TileBatchInstSegDataEntity.unbind` has type `BoundMethod[TileBatchInstSegDataEntity, (self: TileBatchInstSegDataEntity) -> list[tuple[list[dict[str, int | str]], OTXSampleBatch]]]`, which is not assignable to `BoundMethod[TileBatchInstSegDataEntity, (self: TileBatchInstSegDataEntity) -> list[tuple[list[TileInfo], OTXSampleBatch]]]`, the type of `OTXTileBatchDataEntity.unbind`",
+      "concise_description": "Class member `TileBatchInstSegDataEntity.unbind` overrides parent class `OTXTileBatchDataEntity` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 187,
-      "column": 34,
-      "stop_line": 191,
-      "stop_column": 10,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 204,
+      "column": 20,
+      "stop_line": 204,
+      "stop_column": 41,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
       "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
+      "description": "No matching overload found for function `list.__init__` called with arguments: (list[tuple[list[TileInfo], OTXSampleBatch]])\n  Possible overloads:\n  () -> None\n  (iterable: Iterable[tuple[list[dict[str, int | str]], OTXSampleBatch]], /) -> None [closest match]",
+      "concise_description": "No matching overload found for function `list.__init__` called with arguments: (list[tuple[list[TileInfo], OTXSampleBatch]])",
       "severity": "error"
     },
     {
-      "line": 205,
-      "column": 34,
-      "stop_line": 209,
-      "stop_column": 10,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 224,
+      "column": 25,
+      "stop_line": 224,
+      "stop_column": 113,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
+      "name": "bad-argument-type",
+      "description": "Argument `list[list[Image | Tensor]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchInstSegDataEntity.__init__`",
+      "concise_description": "Argument `list[list[Image | Tensor]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchInstSegDataEntity.__init__`",
       "severity": "error"
     },
     {
-      "line": 228,
-      "column": 51,
-      "stop_line": 233,
-      "stop_column": 14,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 230,
+      "column": 18,
+      "stop_line": 230,
+      "stop_column": 29,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ndarray[Unknown, Unknown], format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[int, int], dtype=dtype)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ndarray[Unknown, Unknown], format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[int, int], dtype=dtype)",
+      "name": "missing-attribute",
+      "description": "Object of class `OTXSample` has no attribute `tile`",
+      "concise_description": "Object of class `OTXSample` has no attribute `tile`",
       "severity": "error"
     },
     {
-      "line": 235,
-      "column": 34,
-      "stop_line": 239,
-      "stop_column": 10,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 265,
+      "column": 9,
+      "stop_line": 265,
+      "stop_column": 15,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
+      "name": "bad-override",
+      "description": "Class member `TileBatchSegDataEntity.unbind` overrides parent class `OTXTileBatchDataEntity` in an inconsistent manner\n  `TileBatchSegDataEntity.unbind` has type `BoundMethod[TileBatchSegDataEntity, (self: TileBatchSegDataEntity) -> list[tuple[list[dict[str, int | str]], OTXSampleBatch]]]`, which is not assignable to `BoundMethod[TileBatchSegDataEntity, (self: TileBatchSegDataEntity) -> list[tuple[list[TileInfo], OTXSampleBatch]]]`, the type of `OTXTileBatchDataEntity.unbind`",
+      "concise_description": "Class member `TileBatchSegDataEntity.unbind` overrides parent class `OTXTileBatchDataEntity` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 255,
-      "column": 34,
-      "stop_line": 259,
-      "stop_column": 10,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 275,
+      "column": 28,
+      "stop_line": 275,
+      "stop_column": 103,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[int, int], ori_shape=tuple[int, int])",
+      "name": "bad-argument-type",
+      "description": "Argument `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to parameter `images` with type `Image | Tensor | list[Image] | list[Tensor]` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "concise_description": "Argument `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to parameter `images` with type `Image | Tensor | list[Image] | list[Tensor]` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 273,
-      "column": 30,
-      "stop_line": 273,
-      "stop_column": 66,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 275,
+      "column": 44,
+      "stop_line": 275,
+      "stop_column": 87,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[Image | Tensor | ndarray[Unknown, Unknown] | Any]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
-      "concise_description": "Argument `list[Image | Tensor | ndarray[Unknown, Unknown] | Any]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
+      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
       "severity": "error"
     },
     {
-      "line": 279,
-      "column": 16,
-      "stop_line": 279,
-      "stop_column": 22,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 275,
+      "column": 44,
+      "stop_line": 275,
+      "stop_column": 87,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Tensor | list[Image | Tensor | ndarray[Unknown, Unknown] | Any]` is not assignable to parameter `images` with type `Tensor | list[Tensor]` in function `OTXSampleBatch.__init__`",
-      "concise_description": "Argument `Tensor | list[Image | Tensor | ndarray[Unknown, Unknown] | Any]` is not assignable to parameter `images` with type `Tensor | list[Tensor]` in function `OTXSampleBatch.__init__`",
+      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
+      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
       "severity": "error"
     },
     {
-      "line": 280,
-      "column": 17,
-      "stop_line": 280,
-      "stop_column": 29,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 275,
+      "column": 56,
+      "stop_line": 275,
+      "stop_column": 86,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `label`",
-      "concise_description": "Object of class `OTXSample` has no attribute `label`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Image]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
+      "concise_description": "Argument `list[Image]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
       "severity": "error"
     },
     {
-      "line": 281,
-      "column": 17,
-      "stop_line": 281,
-      "stop_column": 30,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 275,
+      "column": 89,
+      "stop_line": 275,
+      "stop_column": 93,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `bboxes`",
-      "concise_description": "Object of class `OTXSample` has no attribute `bboxes`",
+      "name": "unexpected-keyword",
+      "description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
       "severity": "error"
     },
     {
-      "line": 282,
-      "column": 20,
-      "stop_line": 282,
-      "stop_column": 36,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 275,
+      "column": 89,
+      "stop_line": 275,
+      "stop_column": 93,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `keypoints`",
-      "concise_description": "Object of class `OTXSample` has no attribute `keypoints`",
+      "name": "unexpected-keyword",
+      "description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
+      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
       "severity": "error"
     },
     {
-      "line": 283,
-      "column": 16,
-      "stop_line": 283,
-      "stop_column": 28,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 277,
+      "column": 27,
+      "stop_line": 277,
+      "stop_column": 83,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `masks`",
-      "concise_description": "Object of class `OTXSample` has no attribute `masks`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 323,
-      "column": 29,
-      "stop_line": 323,
-      "stop_column": 40,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 282,
+      "column": 20,
+      "stop_line": 282,
+      "stop_column": 41,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `label_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_labels`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `label_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_labels`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `list.__init__` called with arguments: (list[tuple[list[TileInfo], OTXSampleBatch]])\n  Possible overloads:\n  () -> None\n  (iterable: Iterable[tuple[list[dict[str, int | str]], OTXSampleBatch]], /) -> None [closest match]",
+      "concise_description": "No matching overload found for function `list.__init__` called with arguments: (list[tuple[list[TileInfo], OTXSampleBatch]])",
       "severity": "error"
     },
     {
-      "line": 325,
-      "column": 29,
-      "stop_line": 325,
-      "stop_column": 40,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 302,
+      "column": 25,
+      "stop_line": 302,
+      "stop_column": 113,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[BoundingBoxes]` is not assignable to parameter `boxes_batch` with type `list[BoundingBoxes | None]` in function `otx.data.entity.validation.validate_bboxes`",
-      "concise_description": "Argument `list[BoundingBoxes]` is not assignable to parameter `boxes_batch` with type `list[BoundingBoxes | None]` in function `otx.data.entity.validation.validate_bboxes`",
+      "description": "Argument `list[list[Image | Tensor]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchSegDataEntity.__init__`",
+      "concise_description": "Argument `list[list[Image | Tensor]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchSegDataEntity.__init__`",
       "severity": "error"
     },
     {
-      "line": 327,
-      "column": 32,
-      "stop_line": 327,
-      "stop_column": 46,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 307,
+      "column": 18,
+      "stop_line": 307,
+      "stop_column": 29,
+      "path": "src/otx/data/entity/tile.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `keypoints_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_keypoints`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `keypoints_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_keypoints`",
+      "name": "missing-attribute",
+      "description": "Object of class `OTXSample` has no attribute `tile`",
+      "concise_description": "Object of class `OTXSample` has no attribute `tile`",
       "severity": "error"
     },
     {
-      "line": 329,
-      "column": 28,
-      "stop_line": 329,
-      "stop_column": 38,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 271,
+      "column": 26,
+      "stop_line": 271,
+      "stop_column": 46,
+      "path": "src/otx/data/entity/utils.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Mask]` is not assignable to parameter `masks_batch` with type `list[Mask | None]` in function `otx.data.entity.validation.validate_masks`",
-      "concise_description": "Argument `list[Mask]` is not assignable to parameter `masks_batch` with type `list[Mask | None]` in function `otx.data.entity.validation.validate_masks`",
+      "name": "bad-assignment",
+      "description": "`tuple[int, ...]` is not assignable to attribute `img_shape` with type `tuple[int, int]`",
+      "concise_description": "`tuple[int, ...]` is not assignable to attribute `img_shape` with type `tuple[int, int]`",
       "severity": "error"
     },
     {
-      "line": 346,
-      "column": 40,
-      "stop_line": 346,
-      "stop_column": 54,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 274,
+      "column": 24,
+      "stop_line": 274,
+      "stop_column": 106,
+      "path": "src/otx/data/entity/utils.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "bad-assignment",
+      "description": "`tuple[Tensor, Tensor, Tensor, Tensor]` is not assignable to attribute `padding` with type `tuple[int, int, int, int]`",
+      "concise_description": "`tuple[Tensor, Tensor, Tensor, Tensor]` is not assignable to attribute `padding` with type `tuple[int, int, int, int]`",
       "severity": "error"
     },
     {
-      "line": 346,
-      "column": 40,
-      "stop_line": 346,
-      "stop_column": 54,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 296,
+      "column": 31,
+      "stop_line": 296,
+      "stop_column": 59,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
+      "name": "bad-assignment",
+      "description": "`list[int] | tuple[int, ...]` is not assignable to attribute `input_size` with type `tuple[int, int] | None`",
+      "concise_description": "`list[int] | tuple[int, ...]` is not assignable to attribute `input_size` with type `tuple[int, int] | None`",
       "severity": "error"
     },
     {
-      "line": 346,
-      "column": 56,
-      "stop_line": 346,
-      "stop_column": 60,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 386,
+      "column": 33,
+      "stop_line": 386,
+      "stop_column": 55,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "missing-attribute",
+      "description": "Object of class `NoneType` has no attribute `get`\nObject of class `list` has no attribute `get`\nObject of class `str` has no attribute `get`",
+      "concise_description": "Object of class `NoneType` has no attribute `get`\nObject of class `list` has no attribute `get`\nObject of class `str` has no attribute `get`",
       "severity": "error"
     },
     {
-      "line": 346,
-      "column": 56,
-      "stop_line": 346,
-      "stop_column": 60,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 388,
+      "column": 17,
+      "stop_line": 388,
+      "stop_column": 49,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
+      "name": "unsupported-operation",
+      "description": "Cannot set item in `list[Any]`\n  No matching overload found for function `list.__setitem__` called with arguments: (Literal['input_size'], tuple[int, int])\n  Possible overloads:\n  (key: SupportsIndex, value: Any, /) -> None [closest match]\n  (key: slice[Any, Any, Any], value: Iterable[Any], /) -> None",
+      "concise_description": "Cannot set item in `list[Any]`",
       "severity": "error"
     },
     {
-      "line": 399,
-      "column": 29,
-      "stop_line": 399,
-      "stop_column": 40,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 388,
+      "column": 17,
+      "stop_line": 388,
+      "stop_column": 49,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `scores_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_scores`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `scores_batch` with type `list[Tensor | None]` in function `otx.data.entity.validation.validate_scores`",
+      "name": "unsupported-operation",
+      "description": "Cannot set item in `str`\n  Object of class `str` has no attribute `__setitem__`\n  Did you mean `__getitem__`?",
+      "concise_description": "Cannot set item in `str`",
       "severity": "error"
     },
     {
-      "line": 401,
-      "column": 38,
-      "stop_line": 401,
-      "stop_column": 57,
-      "path": "src/otx/data/entity/sample.py",
+      "line": 388,
+      "column": 17,
+      "stop_line": 388,
+      "stop_column": 49,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `feature_vector_batch` with type `list[Tensor | ndarray[Unknown, Unknown] | None]` in function `otx.data.entity.validation.validate_feature_vectors`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `feature_vector_batch` with type `list[Tensor | ndarray[Unknown, Unknown] | None]` in function `otx.data.entity.validation.validate_feature_vectors`",
+      "name": "unsupported-operation",
+      "description": "Cannot set item in `None`\n  Object of class `NoneType` has no attribute `__setitem__`",
+      "concise_description": "Cannot set item in `None`",
       "severity": "error"
     },
     {
-      "line": 109,
+      "line": 390,
       "column": 17,
-      "stop_line": 109,
-      "stop_column": 47,
-      "path": "src/otx/data/entity/tile.py",
+      "stop_line": 390,
+      "stop_column": 49,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Image]` is not assignable to parameter `tensor_list` with type `list[Tensor]` in function `otx.data.entity.utils.stack_batch`",
-      "concise_description": "Argument `list[Image]` is not assignable to parameter `tensor_list` with type `list[Tensor]` in function `otx.data.entity.utils.stack_batch`",
+      "name": "unsupported-operation",
+      "description": "Cannot set item in `list[Any]`\n  No matching overload found for function `list.__setitem__` called with arguments: (Literal['input_size'], Any)\n  Possible overloads:\n  (key: SupportsIndex, value: Any, /) -> None [closest match]\n  (key: slice[Any, Any, Any], value: Iterable[Any], /) -> None",
+      "concise_description": "Cannot set item in `list[Any]`",
       "severity": "error"
     },
     {
-      "line": 141,
-      "column": 25,
-      "stop_line": 141,
-      "stop_column": 113,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 390,
+      "column": 17,
+      "stop_line": 390,
+      "stop_column": 49,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[list[Image | Tensor | ndarray[Unknown, Unknown] | Any]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchDetDataEntity.__init__`",
-      "concise_description": "Argument `list[list[Image | Tensor | ndarray[Unknown, Unknown] | Any]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchDetDataEntity.__init__`",
+      "name": "unsupported-operation",
+      "description": "Cannot set item in `str`\n  Object of class `str` has no attribute `__setitem__`\n  Did you mean `__getitem__`?",
+      "concise_description": "Cannot set item in `str`",
       "severity": "error"
     },
     {
-      "line": 146,
-      "column": 18,
-      "stop_line": 146,
-      "stop_column": 29,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 390,
+      "column": 17,
+      "stop_line": 390,
+      "stop_column": 49,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `tile`",
-      "concise_description": "Object of class `OTXSample` has no attribute `tile`",
+      "name": "unsupported-operation",
+      "description": "Cannot set item in `None`\n  Object of class `NoneType` has no attribute `__setitem__`",
+      "concise_description": "Cannot set item in `None`",
       "severity": "error"
     },
     {
-      "line": 188,
-      "column": 9,
-      "stop_line": 188,
-      "stop_column": 15,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 390,
+      "column": 68,
+      "stop_line": 390,
+      "stop_column": 80,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `TileBatchInstSegDataEntity.unbind` overrides parent class `OTXTileBatchDataEntity` in an inconsistent manner\n  `TileBatchInstSegDataEntity.unbind` has type `BoundMethod[TileBatchInstSegDataEntity, (self: TileBatchInstSegDataEntity) -> list[tuple[list[dict[str, int | str]], OTXSampleBatch]]]`, which is not assignable to `BoundMethod[TileBatchInstSegDataEntity, (self: TileBatchInstSegDataEntity) -> list[tuple[list[TileInfo], OTXSampleBatch]]]`, the type of `OTXTileBatchDataEntity.unbind`",
-      "concise_description": "Class member `TileBatchInstSegDataEntity.unbind` overrides parent class `OTXTileBatchDataEntity` in an inconsistent manner",
+      "name": "bad-argument-type",
+      "description": "Argument `Literal['input_size']` is not assignable to parameter `index` with type `int` in function `omegaconf.listconfig.ListConfig.get`",
+      "concise_description": "Argument `Literal['input_size']` is not assignable to parameter `index` with type `int` in function `omegaconf.listconfig.ListConfig.get`",
       "severity": "error"
     },
     {
-      "line": 198,
-      "column": 28,
-      "stop_line": 198,
-      "stop_column": 58,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 392,
+      "column": 16,
+      "stop_line": 392,
+      "stop_column": 48,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Image]` is not assignable to parameter `images` with type `Tensor | list[Tensor]` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "concise_description": "Argument `list[Image]` is not assignable to parameter `images` with type `Tensor | list[Tensor]` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "name": "bad-index",
+      "description": "Cannot index into `list[Any]`\n  No matching overload found for function `list.__getitem__` called with arguments: (Literal['input_size'])\n  Possible overloads:\n  (i: SupportsIndex, /) -> Any [closest match]\n  (s: slice[Any, Any, Any], /) -> list[Any]",
+      "concise_description": "Cannot index into `list[Any]`",
       "severity": "error"
     },
     {
-      "line": 204,
-      "column": 20,
-      "stop_line": 204,
-      "stop_column": 41,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 392,
+      "column": 16,
+      "stop_line": 392,
+      "stop_column": 48,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `list.__init__` called with arguments: (list[tuple[list[TileInfo], OTXSampleBatch]])\n  Possible overloads:\n  () -> None\n  (iterable: Iterable[tuple[list[dict[str, int | str]], OTXSampleBatch]], /) -> None [closest match]",
-      "concise_description": "No matching overload found for function `list.__init__` called with arguments: (list[tuple[list[TileInfo], OTXSampleBatch]])",
+      "name": "bad-index",
+      "description": "Cannot index into `str`\n  No matching overload found for function `str.__getitem__` called with arguments: (Literal['input_size'])\n  Possible overloads:\n  (key: SupportsIndex | slice[Any, Any, Any], /) -> LiteralString\n  (key: SupportsIndex | slice[Any, Any, Any], /) -> str [closest match]",
+      "concise_description": "Cannot index into `str`",
       "severity": "error"
     },
     {
-      "line": 224,
-      "column": 25,
-      "stop_line": 224,
-      "stop_column": 113,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 392,
+      "column": 16,
+      "stop_line": 392,
+      "stop_column": 48,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[list[Image | Tensor | ndarray[Unknown, Unknown] | Any]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchInstSegDataEntity.__init__`",
-      "concise_description": "Argument `list[list[Image | Tensor | ndarray[Unknown, Unknown] | Any]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchInstSegDataEntity.__init__`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 230,
-      "column": 18,
-      "stop_line": 230,
-      "stop_column": 29,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 395,
+      "column": 49,
+      "stop_line": 395,
+      "stop_column": 67,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `tile`",
-      "concise_description": "Object of class `OTXSample` has no attribute `tile`",
+      "name": "bad-unpacking",
+      "description": "Expected argument after ** to be a mapping, got: dict[DictKeyType, Any] | list[Any] | str | Any | None in function `otx.config.data.SubsetConfig.__init__`",
+      "concise_description": "Expected argument after ** to be a mapping, got: dict[DictKeyType, Any] | list[Any] | str | Any | None in function `otx.config.data.SubsetConfig.__init__`",
       "severity": "error"
     },
     {
-      "line": 265,
-      "column": 9,
-      "stop_line": 265,
-      "stop_column": 15,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 510,
+      "column": 16,
+      "stop_line": 510,
+      "stop_column": 18,
+      "path": "src/otx/data/module.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `TileBatchSegDataEntity.unbind` overrides parent class `OTXTileBatchDataEntity` in an inconsistent manner\n  `TileBatchSegDataEntity.unbind` has type `BoundMethod[TileBatchSegDataEntity, (self: TileBatchSegDataEntity) -> list[tuple[list[dict[str, int | str]], OTXSampleBatch]]]`, which is not assignable to `BoundMethod[TileBatchSegDataEntity, (self: TileBatchSegDataEntity) -> list[tuple[list[TileInfo], OTXSampleBatch]]]`, the type of `OTXTileBatchDataEntity.unbind`",
-      "concise_description": "Class member `TileBatchSegDataEntity.unbind` overrides parent class `OTXTileBatchDataEntity` in an inconsistent manner",
+      "name": "bad-return",
+      "description": "Returned type `lightning.fabric.utilities.data.AttributeDict` is not assignable to declared return type `lightning.pytorch.utilities.parsing.AttributeDict`",
+      "concise_description": "Returned type `lightning.fabric.utilities.data.AttributeDict` is not assignable to declared return type `lightning.pytorch.utilities.parsing.AttributeDict`",
       "severity": "error"
     },
     {
-      "line": 275,
-      "column": 28,
-      "stop_line": 275,
-      "stop_column": 103,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 51,
+      "column": 20,
+      "stop_line": 51,
+      "stop_column": 80,
+      "path": "src/otx/data/utils/structures/bbox/transforms.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to parameter `images` with type `Tensor | list[Tensor]` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "concise_description": "Argument `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to parameter `images` with type `Tensor | list[Tensor]` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "name": "bad-assignment",
+      "description": "`Tensor` is not assignable to variable `scale_factor` with type `list[float]`",
+      "concise_description": "`Tensor` is not assignable to variable `scale_factor` with type `list[float]`",
       "severity": "error"
     },
     {
-      "line": 275,
-      "column": 44,
-      "stop_line": 275,
-      "stop_column": 87,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 52,
+      "column": 12,
+      "stop_line": 52,
+      "stop_column": 32,
+      "path": "src/otx/data/utils/structures/bbox/transforms.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "bad-return",
+      "description": "Returned type `list[float]` is not assignable to declared return type `Tensor`",
+      "concise_description": "Returned type `list[float]` is not assignable to declared return type `Tensor`",
       "severity": "error"
     },
     {
-      "line": 275,
-      "column": 44,
-      "stop_line": 275,
-      "stop_column": 87,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 46,
+      "column": 23,
+      "stop_line": 53,
+      "stop_column": 6,
+      "path": "src/otx/data/utils/structures/mask/mask_target.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `map.__new__` called with arguments: (type[map[_S]], (pos_proposals: Tensor, pos_assigned_gt_inds: Tensor, gt_masks: Mask, mask_size: list[int], meta_info: dict[Unknown, Unknown]) -> Tensor, list[Tensor], list[Tensor], list[Mask] | list[ndarray[Unknown, Unknown]], list[int], list[dict[Unknown, Unknown]])\n  Possible overloads:\n  (cls: type[map[_S]], func: (_T1) -> _S, iterable: Iterable[_T1], /) -> map[_S]\n  (cls: type[map[_S]], func: (_T1, _T2) -> _S, iterable: Iterable[_T1], iter2: Iterable[_T2], /) -> map[_S]\n  (cls: type[map[_S]], func: (_T1, _T2, _T3) -> _S, iterable: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], /) -> map[_S]\n  (cls: type[map[_S]], func: (_T1, _T2, _T3, _T4) -> _S, iterable: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], iter4: Iterable[_T4], /) -> map[_S]\n  (cls: type[map[_S]], func: (_T1, _T2, _T3, _T4, _T5) -> _S, iterable: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], iter4: Iterable[_T4], iter5: Iterable[_T5], /) -> map[_S] [closest match]\n  (cls: type[map[_S]], func: (...) -> _S, iterable: Iterable[Any], iter2: Iterable[Any], iter3: Iterable[Any], iter4: Iterable[Any], iter5: Iterable[Any], iter6: Iterable[Any], /, *iterables: Iterable[Any]) -> map[_S]",
+      "concise_description": "No matching overload found for function `map.__new__` called with arguments: (type[map[_S]], (pos_proposals: Tensor, pos_assigned_gt_inds: Tensor, gt_masks: Mask, mask_size: list[int], meta_info: dict[Unknown, Unknown]) -> Tensor, list[Tensor], list[Tensor], list[Mask] | list[ndarray[Unknown, Unknown]], list[int], list[dict[Unknown, Unknown]])",
       "severity": "error"
     },
     {
-      "line": 275,
-      "column": 56,
-      "stop_line": 275,
-      "stop_column": 86,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 57,
+      "column": 12,
+      "stop_line": 57,
+      "stop_column": 25,
+      "path": "src/otx/data/utils/structures/mask/mask_target.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Image]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
-      "concise_description": "Argument `list[Image]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
+      "name": "bad-return",
+      "description": "Returned type `Tensor | list[Unknown]` is not assignable to declared return type `Tensor`",
+      "concise_description": "Returned type `Tensor | list[Unknown]` is not assignable to declared return type `Tensor`",
       "severity": "error"
     },
     {
-      "line": 275,
-      "column": 89,
-      "stop_line": 275,
-      "stop_column": 93,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 79,
+      "column": 17,
+      "stop_line": 79,
+      "stop_column": 33,
+      "path": "src/otx/data/utils/structures/mask/mask_target.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "bad-assignment",
+      "description": "`tuple[Unknown, ...]` is not assignable to variable `mask_size` with type `list[int]`",
+      "concise_description": "`tuple[Unknown, ...]` is not assignable to variable `mask_size` with type `list[int]`",
       "severity": "error"
     },
     {
-      "line": 275,
-      "column": 89,
-      "stop_line": 275,
-      "stop_column": 93,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 95,
+      "column": 32,
+      "stop_line": 95,
+      "stop_column": 66,
+      "path": "src/otx/data/utils/structures/mask/mask_target.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
+      "name": "bad-assignment",
+      "description": "`ndarray[Unknown, Unknown]` is not assignable to variable `pos_assigned_gt_inds` with type `Tensor`",
+      "concise_description": "`ndarray[Unknown, Unknown]` is not assignable to variable `pos_assigned_gt_inds` with type `Tensor`",
       "severity": "error"
     },
     {
-      "line": 277,
-      "column": 27,
-      "stop_line": 277,
-      "stop_column": 83,
-      "path": "src/otx/data/entity/tile.py",
+      "line": 101,
+      "column": 18,
+      "stop_line": 101,
+      "stop_column": 38,
+      "path": "src/otx/data/utils/structures/mask/mask_target.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "description": "Argument `Tensor` is not assignable to parameter `inds` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.crop_and_resize_masks`",
+      "concise_description": "Argument `Tensor` is not assignable to parameter `inds` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.crop_and_resize_masks`",
       "severity": "error"
     },
     {
-      "line": 282,
+      "line": 102,
       "column": 20,
-      "stop_line": 282,
-      "stop_column": 41,
-      "path": "src/otx/data/entity/tile.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `list.__init__` called with arguments: (list[tuple[list[TileInfo], OTXSampleBatch]])\n  Possible overloads:\n  () -> None\n  (iterable: Iterable[tuple[list[dict[str, int | str]], OTXSampleBatch]], /) -> None [closest match]",
-      "concise_description": "No matching overload found for function `list.__init__` called with arguments: (list[tuple[list[TileInfo], OTXSampleBatch]])",
-      "severity": "error"
-    },
-    {
-      "line": 302,
-      "column": 25,
-      "stop_line": 302,
-      "stop_column": 113,
-      "path": "src/otx/data/entity/tile.py",
+      "stop_line": 102,
+      "stop_column": 26,
+      "path": "src/otx/data/utils/structures/mask/mask_target.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[list[Image | Tensor | ndarray[Unknown, Unknown] | Any]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchSegDataEntity.__init__`",
-      "concise_description": "Argument `list[list[Image | Tensor | ndarray[Unknown, Unknown] | Any]]` is not assignable to parameter `batch_tiles` with type `list[list[Image]]` in function `TileBatchSegDataEntity.__init__`",
+      "description": "Argument `device` is not assignable to parameter `device` with type `str` in function `otx.data.utils.structures.mask.mask_util.crop_and_resize_masks`",
+      "concise_description": "Argument `device` is not assignable to parameter `device` with type `str` in function `otx.data.utils.structures.mask.mask_util.crop_and_resize_masks`",
       "severity": "error"
     },
     {
-      "line": 307,
+      "line": 111,
       "column": 18,
-      "stop_line": 307,
-      "stop_column": 29,
-      "path": "src/otx/data/entity/tile.py",
+      "stop_line": 111,
+      "stop_column": 60,
+      "path": "src/otx/data/utils/structures/mask/mask_util.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `tile`",
-      "concise_description": "Object of class `OTXSample` has no attribute `tile`",
+      "name": "bad-assignment",
+      "description": "`Tensor` is not assignable to variable `bboxes` with type `ndarray[Unknown, Unknown]`",
+      "concise_description": "`Tensor` is not assignable to variable `bboxes` with type `ndarray[Unknown, Unknown]`",
       "severity": "error"
     },
     {
-      "line": 33,
-      "column": 48,
-      "stop_line": 33,
-      "stop_column": 76,
-      "path": "src/otx/data/entity/utils.py",
+      "line": 113,
+      "column": 16,
+      "stop_line": 113,
+      "stop_column": 56,
+      "path": "src/otx/data/utils/structures/mask/mask_util.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `Image | Tensor | ndarray[Unknown, Unknown] | Any` is not assignable to parameter `subset` with type `Subset` in function `otx.data.entity.sample.OTXSample.__init__`",
-      "concise_description": "Unpacked keyword argument `Image | Tensor | ndarray[Unknown, Unknown] | Any` is not assignable to parameter `subset` with type `Subset` in function `otx.data.entity.sample.OTXSample.__init__`",
+      "name": "bad-assignment",
+      "description": "`Tensor` is not assignable to variable `inds` with type `ndarray[Unknown, Unknown]`",
+      "concise_description": "`Tensor` is not assignable to variable `inds` with type `ndarray[Unknown, Unknown]`",
       "severity": "error"
     },
     {
-      "line": 88,
-      "column": 26,
-      "stop_line": 88,
-      "stop_column": 46,
-      "path": "src/otx/data/entity/utils.py",
-      "code": -2,
-      "name": "bad-assignment",
-      "description": "`tuple[int, ...]` is not assignable to attribute `img_shape` with type `tuple[int, int]`",
-      "concise_description": "`tuple[int, ...]` is not assignable to attribute `img_shape` with type `tuple[int, int]`",
+      "line": 117,
+      "column": 21,
+      "stop_line": 117,
+      "stop_column": 49,
+      "path": "src/otx/data/utils/structures/mask/mask_util.py",
+      "code": -2,
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C._VariableFunctions.cat` called with arguments: (list[Tensor | ndarray[Unknown, Unknown]], dim=Literal[1])\n  Possible overloads:\n  (tensors: list[Tensor] | tuple[Tensor, ...] | None, dim: int = 0, *, out: Tensor | None = None) -> Tensor [closest match]\n  (tensors: list[Tensor] | tuple[Tensor, ...] | None, dim: EllipsisType | str | None, *, out: Tensor | None = None) -> Tensor",
+      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.cat` called with arguments: (list[Tensor | ndarray[Unknown, Unknown]], dim=Literal[1])",
       "severity": "error"
     },
     {
-      "line": 91,
-      "column": 24,
-      "stop_line": 91,
-      "stop_column": 106,
-      "path": "src/otx/data/entity/utils.py",
+      "line": 120,
+      "column": 41,
+      "stop_line": 120,
+      "stop_column": 50,
+      "path": "src/otx/data/utils/structures/mask/mask_util.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`tuple[Tensor, Tensor, Tensor, Tensor]` is not assignable to attribute `padding` with type `tuple[int, int, int, int]`",
-      "concise_description": "`tuple[Tensor, Tensor, Tensor, Tensor]` is not assignable to attribute `padding` with type `tuple[int, int, int, int]`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.index_select` called with arguments: (Literal[0], ndarray[Unknown, Unknown])\n  Possible overloads:\n  (dim: int, index: Tensor) -> Tensor [closest match]\n  (dim: EllipsisType | str | None, index: Tensor) -> Tensor",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.index_select` called with arguments: (Literal[0], ndarray[Unknown, Unknown])",
       "severity": "error"
     },
     {
-      "line": 66,
-      "column": 43,
-      "stop_line": 66,
-      "stop_column": 52,
-      "path": "src/otx/data/factory.py",
+      "line": 121,
+      "column": 19,
+      "stop_line": 121,
+      "stop_column": 28,
+      "path": "src/otx/data/utils/structures/mask/mask_util.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
+      "name": "not-callable",
+      "description": "Expected a callable, got `str`",
+      "concise_description": "Expected a callable, got `str`",
       "severity": "error"
     },
     {
-      "line": 68,
-      "column": 44,
-      "stop_line": 68,
-      "stop_column": 59,
-      "path": "src/otx/data/factory.py",
+      "line": 125,
+      "column": 12,
+      "stop_line": 125,
+      "stop_column": 31,
+      "path": "src/otx/data/utils/structures/mask/mask_util.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.classification.OTXMulticlassClsDataset.__init__`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.classification.OTXMulticlassClsDataset.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `bool` has no attribute `float`",
+      "concise_description": "Object of class `bool` has no attribute `float`",
       "severity": "error"
     },
     {
-      "line": 73,
-      "column": 43,
-      "stop_line": 73,
-      "stop_column": 52,
-      "path": "src/otx/data/factory.py",
+      "line": 37,
+      "column": 38,
+      "stop_line": 37,
+      "stop_column": 51,
+      "path": "src/otx/engine/__init__.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
+      "description": "Argument `type[Engine]` is not assignable to parameter `object` with type `type[OTXEngine] | type[OVEngine]` in function `list.append`",
+      "concise_description": "Argument `type[Engine]` is not assignable to parameter `object` with type `type[OTXEngine] | type[OVEngine]` in function `list.append`",
       "severity": "error"
     },
     {
-      "line": 75,
-      "column": 44,
-      "stop_line": 75,
-      "stop_column": 59,
-      "path": "src/otx/data/factory.py",
+      "line": 60,
+      "column": 16,
+      "stop_line": 60,
+      "stop_column": 32,
+      "path": "src/otx/metrics/accuracy.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.classification.OTXMultilabelClsDataset.__init__`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.classification.OTXMultilabelClsDataset.__init__`",
+      "name": "bad-return",
+      "description": "Returned type `Metric` is not assignable to declared return type `NamedConfusionMatrix`",
+      "concise_description": "Returned type `Metric` is not assignable to declared return type `NamedConfusionMatrix`",
       "severity": "error"
     },
     {
-      "line": 80,
-      "column": 43,
-      "stop_line": 80,
-      "stop_column": 52,
-      "path": "src/otx/data/factory.py",
+      "line": 104,
+      "column": 9,
+      "stop_line": 104,
+      "stop_column": 15,
+      "path": "src/otx/metrics/accuracy.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
+      "name": "bad-override",
+      "description": "Class member `AccuracywithLabelGroup.update` overrides parent class `Metric` in an inconsistent manner\n  `AccuracywithLabelGroup.update` has type `BoundMethod[AccuracywithLabelGroup, (self: AccuracywithLabelGroup, preds: Tensor, target: Tensor) -> None]`, which is not assignable to `BoundMethod[AccuracywithLabelGroup, (self: AccuracywithLabelGroup, *_: Any, **__: Any) -> None]`, the type of `Metric.update`",
+      "concise_description": "Class member `AccuracywithLabelGroup.update` overrides parent class `Metric` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 82,
-      "column": 40,
-      "stop_line": 82,
-      "stop_column": 55,
-      "path": "src/otx/data/factory.py",
+      "line": 131,
+      "column": 67,
+      "stop_line": 131,
+      "stop_column": 80,
+      "path": "src/otx/metrics/accuracy.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.classification.OTXHlabelClsDataset.__init__`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.classification.OTXHlabelClsDataset.__init__`",
+      "description": "Argument `list[NamedConfusionMatrix]` is not assignable to parameter `conf_matrices` with type `Tensor` in function `AccuracywithLabelGroup._compute_accuracy_from_conf_matrices`",
+      "concise_description": "Argument `list[NamedConfusionMatrix]` is not assignable to parameter `conf_matrices` with type `Tensor` in function `AccuracywithLabelGroup._compute_accuracy_from_conf_matrices`",
       "severity": "error"
     },
     {
-      "line": 87,
-      "column": 43,
-      "stop_line": 87,
-      "stop_column": 52,
-      "path": "src/otx/data/factory.py",
+      "line": 284,
+      "column": 63,
+      "stop_line": 290,
+      "stop_column": 10,
+      "path": "src/otx/metrics/accuracy.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
+      "name": "bad-assignment",
+      "description": "`list[Metric]` is not assignable to attribute `multiclass_head_accuracy` with type `list[Accuracy]`",
+      "concise_description": "`list[Metric]` is not assignable to attribute `multiclass_head_accuracy` with type `list[Accuracy]`",
       "severity": "error"
     },
     {
-      "line": 89,
-      "column": 40,
-      "stop_line": 89,
-      "stop_column": 55,
-      "path": "src/otx/data/factory.py",
+      "line": 306,
+      "column": 41,
+      "stop_line": 312,
+      "stop_column": 10,
+      "path": "src/otx/metrics/accuracy.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.detection.OTXDetectionDataset.__init__`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.detection.OTXDetectionDataset.__init__`",
+      "name": "bad-assignment",
+      "description": "`list[Module]` is not assignable to attribute `multiclass_head_accuracy` with type `list[Accuracy]`",
+      "concise_description": "`list[Module]` is not assignable to attribute `multiclass_head_accuracy` with type `list[Accuracy]`",
       "severity": "error"
     },
     {
-      "line": 94,
-      "column": 43,
-      "stop_line": 94,
-      "stop_column": 52,
-      "path": "src/otx/data/factory.py",
+      "line": 317,
+      "column": 9,
+      "stop_line": 317,
+      "stop_column": 15,
+      "path": "src/otx/metrics/accuracy.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
+      "name": "bad-override",
+      "description": "Class member `MixedHLabelAccuracy.update` overrides parent class `Metric` in an inconsistent manner\n  `MixedHLabelAccuracy.update` has type `BoundMethod[MixedHLabelAccuracy, (self: MixedHLabelAccuracy, preds: Tensor, target: Tensor) -> None]`, which is not assignable to `BoundMethod[MixedHLabelAccuracy, (self: MixedHLabelAccuracy, *_: Any, **__: Any) -> None]`, the type of `Metric.update`",
+      "concise_description": "Class member `MixedHLabelAccuracy.update` overrides parent class `Metric` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 96,
-      "column": 58,
-      "stop_line": 96,
+      "line": 343,
+      "column": 17,
+      "stop_line": 343,
       "stop_column": 73,
-      "path": "src/otx/data/factory.py",
+      "path": "src/otx/metrics/accuracy.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.instance_segmentation.OTXInstanceSegDataset.__init__`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.instance_segmentation.OTXInstanceSegDataset.__init__`",
+      "description": "Argument `list[None]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
+      "concise_description": "Argument `list[None]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
       "severity": "error"
     },
     {
-      "line": 101,
-      "column": 43,
-      "stop_line": 101,
-      "stop_column": 52,
-      "path": "src/otx/data/factory.py",
+      "line": 680,
+      "column": 9,
+      "stop_line": 680,
+      "stop_column": 15,
+      "path": "src/otx/metrics/fmeasure.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
+      "name": "bad-override",
+      "description": "Class member `FMeasure.update` overrides parent class `Metric` in an inconsistent manner\n  `FMeasure.update` has type `BoundMethod[FMeasure, (self: FMeasure, preds: list[dict[str, Tensor]], target: list[dict[str, Tensor]]) -> None]`, which is not assignable to `BoundMethod[FMeasure, (self: FMeasure, *_: Any, **__: Any) -> None]`, the type of `Metric.update`",
+      "concise_description": "Class member `FMeasure.update` overrides parent class `Metric` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 103,
-      "column": 43,
-      "stop_line": 103,
-      "stop_column": 58,
-      "path": "src/otx/data/factory.py",
+      "line": 79,
+      "column": 5,
+      "stop_line": 79,
+      "stop_column": 22,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.segmentation.OTXSegmentationDataset.__init__`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.segmentation.OTXSegmentationDataset.__init__`",
+      "name": "bad-override",
+      "description": "Class member `LeafAccuracy.full_state_update` overrides parent class `Metric` in an inconsistent manner\n  `LeafAccuracy.full_state_update` has type `bool`, which is not consistent with `bool | None` in `Metric.full_state_update` (the type of read-write attributes cannot be changed)",
+      "concise_description": "Class member `LeafAccuracy.full_state_update` overrides parent class `Metric` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 108,
-      "column": 43,
-      "stop_line": 108,
-      "stop_column": 52,
-      "path": "src/otx/data/factory.py",
+      "line": 105,
+      "column": 13,
+      "stop_line": 105,
+      "stop_column": 38,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `legacy_dataset` with type `datumaro.components.dataset.Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 110,
-      "column": 48,
-      "stop_line": 110,
-      "stop_column": 63,
-      "path": "src/otx/data/factory.py",
+      "line": 105,
+      "column": 13,
+      "stop_line": 105,
+      "stop_column": 38,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.keypoint_detection.OTXKeypointDetectionDataset.__init__`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset | datumaro.experimental.dataset.Dataset[Unknown]` is not assignable to parameter `dm_subset` with type `datumaro.experimental.dataset.Dataset[Unknown]` in function `otx.data.dataset.keypoint_detection.OTXKeypointDetectionDataset.__init__`",
+      "name": "unsupported-operation",
+      "description": "Cannot set item in `Module`\n  Expected `__setitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot set item in `Module`",
       "severity": "error"
     },
     {
-      "line": 114,
-      "column": 17,
-      "stop_line": 114,
-      "stop_column": 33,
-      "path": "src/otx/data/module.py",
+      "line": 106,
+      "column": 13,
+      "stop_line": 106,
+      "stop_column": 40,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str | None` is not assignable to parameter `data_format` with type `str` in function `otx.data.utils.pre_filtering.pre_filtering`",
-      "concise_description": "Argument `str | None` is not assignable to parameter `data_format` with type `str` in function `otx.data.utils.pre_filtering.pre_filtering`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 131,
-      "column": 57,
-      "stop_line": 131,
-      "stop_column": 64,
-      "path": "src/otx/data/module.py",
+      "line": 106,
+      "column": 13,
+      "stop_line": 106,
+      "stop_column": 40,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `datumaro.components.dataset.Dataset` is not assignable to parameter `dataset` with type `torch.utils.data.dataset.Dataset[Unknown]` in function `otx.data.utils.utils.adapt_tile_config`",
-      "concise_description": "Argument `datumaro.components.dataset.Dataset` is not assignable to parameter `dataset` with type `torch.utils.data.dataset.Dataset[Unknown]` in function `otx.data.utils.utils.adapt_tile_config`",
+      "name": "unsupported-operation",
+      "description": "Cannot set item in `Module`\n  Expected `__setitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot set item in `Module`",
       "severity": "error"
     },
     {
-      "line": 221,
-      "column": 26,
-      "stop_line": 221,
-      "stop_column": 45,
-      "path": "src/otx/data/module.py",
+      "line": 110,
+      "column": 17,
+      "stop_line": 110,
+      "stop_column": 48,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "not-iterable",
-      "description": "Type `Compose` is not iterable\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Type `Compose` is not iterable",
+      "name": "not-callable",
+      "description": "Expected a callable, got `Tensor`",
+      "concise_description": "Expected a callable, got `Tensor`",
       "severity": "error"
     },
     {
-      "line": 342,
-      "column": 31,
-      "stop_line": 342,
-      "stop_column": 59,
-      "path": "src/otx/data/module.py",
+      "line": 111,
+      "column": 25,
+      "stop_line": 111,
+      "stop_column": 71,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[int] | tuple[int, ...] | Unknown` is not assignable to attribute `input_size` with type `tuple[int, int] | None`",
-      "concise_description": "`list[int] | tuple[int, ...] | Unknown` is not assignable to attribute `input_size` with type `tuple[int, int] | None`",
+      "name": "unsupported-operation",
+      "description": "`/` is not supported between `Module` and `Tensor`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._tensor.Tensor.__rdiv__`",
+      "concise_description": "`/` is not supported between `Module` and `Tensor`",
       "severity": "error"
     },
     {
-      "line": 428,
-      "column": 33,
-      "stop_line": 428,
-      "stop_column": 55,
-      "path": "src/otx/data/module.py",
+      "line": 118,
+      "column": 5,
+      "stop_line": 118,
+      "stop_column": 22,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `get`\nObject of class `list` has no attribute `get`\nObject of class `str` has no attribute `get`",
-      "concise_description": "Object of class `NoneType` has no attribute `get`\nObject of class `list` has no attribute `get`\nObject of class `str` has no attribute `get`",
+      "name": "bad-override",
+      "description": "Class member `FullPathAccuracy.full_state_update` overrides parent class `Metric` in an inconsistent manner\n  `FullPathAccuracy.full_state_update` has type `bool`, which is not consistent with `bool | None` in `Metric.full_state_update` (the type of read-write attributes cannot be changed)",
+      "concise_description": "Class member `FullPathAccuracy.full_state_update` overrides parent class `Metric` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 430,
-      "column": 17,
-      "stop_line": 430,
-      "stop_column": 49,
-      "path": "src/otx/data/module.py",
+      "line": 130,
+      "column": 9,
+      "stop_line": 130,
+      "stop_column": 38,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "Cannot set item in `list[Any]`\n  No matching overload found for function `list.__setitem__` called with arguments: (Literal['input_size'], tuple[int, int])\n  Possible overloads:\n  (key: SupportsIndex, value: Any, /) -> None [closest match]\n  (key: slice[Any, Any, Any], value: Iterable[Any], /) -> None",
-      "concise_description": "Cannot set item in `list[Any]`",
+      "description": "`+=` is not supported between `Module` and `Tensor`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._C.TensorBase.__radd__`",
+      "concise_description": "`+=` is not supported between `Module` and `Tensor`",
       "severity": "error"
     },
     {
-      "line": 430,
-      "column": 17,
-      "stop_line": 430,
-      "stop_column": 49,
-      "path": "src/otx/data/module.py",
+      "line": 131,
+      "column": 9,
+      "stop_line": 131,
+      "stop_column": 36,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "Cannot set item in `str`\n  Object of class `str` has no attribute `__setitem__`\n  Did you mean `__getitem__`?",
-      "concise_description": "Cannot set item in `str`",
+      "description": "`+=` is not supported between `Module` and `int`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__radd__`",
+      "concise_description": "`+=` is not supported between `Module` and `int`",
       "severity": "error"
     },
     {
-      "line": 430,
-      "column": 17,
-      "stop_line": 430,
-      "stop_column": 49,
-      "path": "src/otx/data/module.py",
+      "line": 135,
+      "column": 16,
+      "stop_line": 135,
+      "stop_column": 70,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "Cannot set item in `None`\n  Object of class `NoneType` has no attribute `__setitem__`",
-      "concise_description": "Cannot set item in `None`",
+      "description": "`/` is not supported between `Module` and `Tensor`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._tensor.Tensor.__rdiv__`",
+      "concise_description": "`/` is not supported between `Module` and `Tensor`",
       "severity": "error"
     },
     {
-      "line": 432,
-      "column": 17,
-      "stop_line": 432,
-      "stop_column": 49,
-      "path": "src/otx/data/module.py",
+      "line": 135,
+      "column": 39,
+      "stop_line": 135,
+      "stop_column": 59,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "Cannot set item in `list[Any]`\n  No matching overload found for function `list.__setitem__` called with arguments: (Literal['input_size'], Any)\n  Possible overloads:\n  (key: SupportsIndex, value: Any, /) -> None [closest match]\n  (key: slice[Any, Any, Any], value: Iterable[Any], /) -> None",
-      "concise_description": "Cannot set item in `list[Any]`",
+      "name": "not-callable",
+      "description": "Expected a callable, got `Tensor`",
+      "concise_description": "Expected a callable, got `Tensor`",
       "severity": "error"
     },
     {
-      "line": 432,
-      "column": 17,
-      "stop_line": 432,
-      "stop_column": 49,
-      "path": "src/otx/data/module.py",
+      "line": 141,
+      "column": 5,
+      "stop_line": 141,
+      "stop_column": 22,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "Cannot set item in `str`\n  Object of class `str` has no attribute `__setitem__`\n  Did you mean `__getitem__`?",
-      "concise_description": "Cannot set item in `str`",
+      "name": "bad-override",
+      "description": "Class member `InconsistentPathRatio.full_state_update` overrides parent class `Metric` in an inconsistent manner\n  `InconsistentPathRatio.full_state_update` has type `bool`, which is not consistent with `bool | None` in `Metric.full_state_update` (the type of read-write attributes cannot be changed)",
+      "concise_description": "Class member `InconsistentPathRatio.full_state_update` overrides parent class `Metric` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 432,
+      "line": 164,
       "column": 17,
-      "stop_line": 432,
-      "stop_column": 49,
-      "path": "src/otx/data/module.py",
+      "stop_line": 164,
+      "stop_column": 34,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
       "name": "unsupported-operation",
-      "description": "Cannot set item in `None`\n  Object of class `NoneType` has no attribute `__setitem__`",
-      "concise_description": "Cannot set item in `None`",
+      "description": "`+=` is not supported between `Module` and `Literal[1]`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__radd__`",
+      "concise_description": "`+=` is not supported between `Module` and `Literal[1]`",
       "severity": "error"
     },
     {
-      "line": 432,
-      "column": 68,
-      "stop_line": 432,
-      "stop_column": 80,
-      "path": "src/otx/data/module.py",
+      "line": 165,
+      "column": 9,
+      "stop_line": 165,
+      "stop_column": 24,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal['input_size']` is not assignable to parameter `index` with type `int` in function `omegaconf.listconfig.ListConfig.get`",
-      "concise_description": "Argument `Literal['input_size']` is not assignable to parameter `index` with type `int` in function `omegaconf.listconfig.ListConfig.get`",
+      "name": "unsupported-operation",
+      "description": "`+=` is not supported between `Module` and `int`\n  Argument `Module` is not assignable to parameter `value` with type `int` in function `int.__radd__`",
+      "concise_description": "`+=` is not supported between `Module` and `int`",
       "severity": "error"
     },
     {
-      "line": 434,
+      "line": 169,
       "column": 16,
-      "stop_line": 434,
-      "stop_column": 48,
-      "path": "src/otx/data/module.py",
+      "stop_line": 169,
+      "stop_column": 70,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `list[Any]`\n  No matching overload found for function `list.__getitem__` called with arguments: (Literal['input_size'])\n  Possible overloads:\n  (i: SupportsIndex, /) -> Any [closest match]\n  (s: slice[Any, Any, Any], /) -> list[Any]",
-      "concise_description": "Cannot index into `list[Any]`",
+      "name": "unsupported-operation",
+      "description": "`/` is not supported between `Module` and `Tensor`\n  Argument `Module` is not assignable to parameter `other` with type `Tensor | bool | complex | float | int` in function `torch._tensor.Tensor.__rdiv__`",
+      "concise_description": "`/` is not supported between `Module` and `Tensor`",
       "severity": "error"
     },
     {
-      "line": 434,
-      "column": 16,
-      "stop_line": 434,
-      "stop_column": 48,
-      "path": "src/otx/data/module.py",
+      "line": 169,
+      "column": 39,
+      "stop_line": 169,
+      "stop_column": 59,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `str`\n  No matching overload found for function `str.__getitem__` called with arguments: (Literal['input_size'])\n  Possible overloads:\n  (key: SupportsIndex | slice[Any, Any, Any], /) -> LiteralString\n  (key: SupportsIndex | slice[Any, Any, Any], /) -> str [closest match]",
-      "concise_description": "Cannot index into `str`",
+      "name": "not-callable",
+      "description": "Expected a callable, got `Tensor`",
+      "concise_description": "Expected a callable, got `Tensor`",
       "severity": "error"
     },
     {
-      "line": 434,
-      "column": 16,
-      "stop_line": 434,
-      "stop_column": 48,
-      "path": "src/otx/data/module.py",
+      "line": 179,
+      "column": 5,
+      "stop_line": 179,
+      "stop_column": 22,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
+      "name": "bad-override",
+      "description": "Class member `WeightedHierarchicalPrecision.full_state_update` overrides parent class `Metric` in an inconsistent manner\n  `WeightedHierarchicalPrecision.full_state_update` has type `bool`, which is not consistent with `bool | None` in `Metric.full_state_update` (the type of read-write attributes cannot be changed)",
+      "concise_description": "Class member `WeightedHierarchicalPrecision.full_state_update` overrides parent class `Metric` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 437,
-      "column": 49,
-      "stop_line": 437,
-      "stop_column": 67,
-      "path": "src/otx/data/module.py",
+      "line": 197,
+      "column": 13,
+      "stop_line": 197,
+      "stop_column": 26,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-unpacking",
-      "description": "Expected argument after ** to be a mapping, got: dict[DictKeyType, Any] | list[Any] | str | Any | None in function `otx.config.data.SubsetConfig.__init__`",
-      "concise_description": "Expected argument after ** to be a mapping, got: dict[DictKeyType, Any] | list[Any] | str | Any | None in function `otx.config.data.SubsetConfig.__init__`",
+      "name": "not-callable",
+      "description": "Expected a callable, got `Tensor`",
+      "concise_description": "Expected a callable, got `Tensor`",
       "severity": "error"
     },
     {
-      "line": 438,
-      "column": 45,
-      "stop_line": 438,
-      "stop_column": 97,
-      "path": "src/otx/data/module.py",
+      "line": 203,
+      "column": 22,
+      "stop_line": 203,
+      "stop_column": 36,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`Compose` is not assignable to attribute `transforms` with type `list[dict[str, Any]]`",
-      "concise_description": "`Compose` is not assignable to attribute `transforms` with type `list[dict[str, Any]]`",
+      "name": "not-callable",
+      "description": "Expected a callable, got `Tensor`",
+      "concise_description": "Expected a callable, got `Tensor`",
       "severity": "error"
     },
     {
-      "line": 552,
-      "column": 16,
-      "stop_line": 552,
-      "stop_column": 18,
-      "path": "src/otx/data/module.py",
+      "line": 209,
+      "column": 13,
+      "stop_line": 209,
+      "stop_column": 25,
+      "path": "src/otx/metrics/hier_metric_collection.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `lightning.fabric.utilities.data.AttributeDict` is not assignable to declared return type `lightning.pytorch.utilities.parsing.AttributeDict`",
-      "concise_description": "Returned type `lightning.fabric.utilities.data.AttributeDict` is not assignable to declared return type `lightning.pytorch.utilities.parsing.AttributeDict`",
+      "name": "not-callable",
+      "description": "Expected a callable, got `Tensor`",
+      "concise_description": "Expected a callable, got `Tensor`",
       "severity": "error"
     },
     {
-      "line": 164,
-      "column": 27,
-      "stop_line": 164,
-      "stop_column": 52,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 72,
+      "column": 28,
+      "stop_line": 72,
+      "stop_column": 33,
+      "path": "src/otx/metrics/mean_ap.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to `ndarray[Unknown, Unknown]`",
-      "concise_description": "`list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to `ndarray[Unknown, Unknown]`",
+      "name": "unbound-name",
+      "description": "`masks` may be uninitialized",
+      "concise_description": "`masks` may be uninitialized",
       "severity": "error"
     },
     {
-      "line": 224,
-      "column": 28,
-      "stop_line": 224,
-      "stop_column": 40,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 45,
+      "column": 9,
+      "stop_line": 45,
+      "stop_column": 15,
+      "path": "src/otx/metrics/mlc_map.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `label`",
-      "concise_description": "Object of class `OTXSample` has no attribute `label`",
+      "name": "bad-override",
+      "description": "Class member `MultilabelmAP.update` overrides parent class `Metric` in an inconsistent manner\n  `MultilabelmAP.update` has type `BoundMethod[MultilabelmAP, (self: MultilabelmAP, preds: Tensor, target: Tensor) -> None]`, which is not assignable to `BoundMethod[MultilabelmAP, (self: MultilabelmAP, *_: Any, **__: Any) -> None]`, the type of `Metric.update`",
+      "concise_description": "Class member `MultilabelmAP.update` overrides parent class `Metric` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 322,
-      "column": 24,
-      "stop_line": 322,
-      "stop_column": 33,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 164,
+      "column": 9,
+      "stop_line": 164,
+      "stop_column": 15,
+      "path": "src/otx/metrics/pck.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `list` has no attribute `shape`",
-      "concise_description": "Object of class `list` has no attribute `shape`",
+      "name": "bad-override",
+      "description": "Class member `PCKMeasure.update` overrides parent class `Metric` in an inconsistent manner\n  `PCKMeasure.update` has type `BoundMethod[PCKMeasure, (self: PCKMeasure, preds: list[dict[str, Tensor]], target: list[dict[str, Tensor]]) -> None]`, which is not assignable to `BoundMethod[PCKMeasure, (self: PCKMeasure, *_: Any, **__: Any) -> None]`, the type of `Metric.update`",
+      "concise_description": "Class member `PCKMeasure.update` overrides parent class `Metric` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 324,
-      "column": 33,
-      "stop_line": 330,
+      "line": 168,
+      "column": 17,
+      "stop_line": 170,
       "stop_column": 18,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "path": "src/otx/metrics/pck.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `cv2.resize` called with arguments: (list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown], None, fx=float, fy=float, interpolation=int)\n  Possible overloads:\n  (src: MatLike, dsize: Sequence[int] | None, dst: Mat | ndarray[Any, dtype[floating[Any] | integer[Any]]] | None = ..., fx: float = ..., fy: float = ..., interpolation: int = ...) -> MatLike [closest match]\n  (src: UMat, dsize: Sequence[int] | None, dst: UMat | None = ..., fx: float = ..., fy: float = ..., interpolation: int = ...) -> UMat",
-      "concise_description": "No matching overload found for function `cv2.resize` called with arguments: (list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown], None, fx=float, fy=float, interpolation=int)",
+      "name": "bad-argument-type",
+      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `iterable` with type `Iterable[ndarray[Unknown, Unknown]]` in function `list.extend`",
+      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `iterable` with type `Iterable[ndarray[Unknown, Unknown]]` in function `list.extend`",
       "severity": "error"
     },
     {
-      "line": 334,
-      "column": 33,
-      "stop_line": 334,
-      "stop_column": 103,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 173,
+      "column": 17,
+      "stop_line": 175,
+      "stop_column": 18,
+      "path": "src/otx/metrics/pck.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `cv2.resize` called with arguments: (list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown], tuple[int, ...], interpolation=int)\n  Possible overloads:\n  (src: MatLike, dsize: Sequence[int] | None, dst: Mat | ndarray[Any, dtype[floating[Any] | integer[Any]]] | None = ..., fx: float = ..., fy: float = ..., interpolation: int = ...) -> MatLike [closest match]\n  (src: UMat, dsize: Sequence[int] | None, dst: UMat | None = ..., fx: float = ..., fy: float = ..., interpolation: int = ...) -> UMat",
-      "concise_description": "No matching overload found for function `cv2.resize` called with arguments: (list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown], tuple[int, ...], interpolation=int)",
+      "name": "bad-argument-type",
+      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `iterable` with type `Iterable[ndarray[Unknown, Unknown]]` in function `list.extend`",
+      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `iterable` with type `Iterable[ndarray[Unknown, Unknown]]` in function `list.extend`",
       "severity": "error"
     },
     {
-      "line": 620,
-      "column": 34,
-      "stop_line": 620,
-      "stop_column": 37,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 276,
+      "column": 56,
+      "stop_line": 276,
+      "stop_column": 66,
+      "path": "src/otx/tools/auto_configurator.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to parameter `img` with type `ndarray[Unknown, Unknown]` in function `RandomResizedCrop._crop_img`",
-      "concise_description": "Argument `list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to parameter `img` with type `ndarray[Unknown, Unknown]` in function `RandomResizedCrop._crop_img`",
+      "name": "unbound-name",
+      "description": "`datamodule` may be uninitialized",
+      "concise_description": "`datamodule` may be uninitialized",
       "severity": "error"
     },
     {
-      "line": 621,
-      "column": 66,
-      "stop_line": 621,
-      "stop_column": 75,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 356,
+      "column": 9,
+      "stop_line": 356,
+      "stop_column": 18,
+      "path": "src/otx/types/label.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `list` has no attribute `shape`",
-      "concise_description": "Object of class `list` has no attribute `shape`",
+      "name": "bad-param-name-override",
+      "description": "Class member `NullLabelInfo.from_json` overrides parent class `LabelInfo` in an inconsistent manner\n  Got parameter name `_`, expected `serialized`",
+      "concise_description": "Class member `NullLabelInfo.from_json` overrides parent class `LabelInfo` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 622,
-      "column": 29,
-      "stop_line": 627,
-      "stop_column": 14,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 57,
+      "column": 55,
+      "stop_line": 57,
+      "stop_column": 76,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `cv2.resize` called with arguments: (list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown], tuple[int, ...], dst=None, interpolation=int)\n  Possible overloads:\n  (src: MatLike, dsize: Sequence[int] | None, dst: Mat | ndarray[Any, dtype[floating[Any] | integer[Any]]] | None = ..., fx: float = ..., fy: float = ..., interpolation: int = ...) -> MatLike [closest match]\n  (src: UMat, dsize: Sequence[int] | None, dst: UMat | None = ..., fx: float = ..., fy: float = ..., interpolation: int = ...) -> UMat",
-      "concise_description": "No matching overload found for function `cv2.resize` called with arguments: (list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown], tuple[int, ...], dst=None, interpolation=int)",
+      "name": "bad-argument-type",
+      "description": "Argument `(values: list[Any], context: dict[str, Any]) -> MockSample` is not assignable to parameter `unflatten_fn` with type `(Iterable[Any], Any) -> Any` in function `torch.utils._pytree.register_pytree_node`",
+      "concise_description": "Argument `(values: list[Any], context: dict[str, Any]) -> MockSample` is not assignable to parameter `unflatten_fn` with type `(Iterable[Any], Any) -> Any` in function `torch.utils._pytree.register_pytree_node`",
       "severity": "error"
     },
     {
-      "line": 633,
-      "column": 40,
-      "stop_line": 633,
-      "stop_column": 45,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 101,
+      "column": 30,
+      "stop_line": 101,
+      "stop_column": 36,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to parameter `img` with type `ndarray[Unknown, Unknown]` in function `RandomResizedCrop._crop_img`",
-      "concise_description": "Argument `list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to parameter `img` with type `ndarray[Unknown, Unknown]` in function `RandomResizedCrop._crop_img`",
+      "name": "not-a-type",
+      "description": "Expected a type form, got instance of `Module[pytest]`",
+      "concise_description": "Expected a type form, got instance of `Module[pytest]`",
       "severity": "error"
     },
     {
-      "line": 634,
-      "column": 35,
-      "stop_line": 639,
-      "stop_column": 18,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 230,
+      "column": 32,
+      "stop_line": 230,
+      "stop_column": 83,
+      "path": "tests/conftest.py",
       "code": -2,
       "name": "no-matching-overload",
-      "description": "No matching overload found for function `cv2.resize` called with arguments: (list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown], tuple[int, ...], dst=None, interpolation=int)\n  Possible overloads:\n  (src: MatLike, dsize: Sequence[int] | None, dst: Mat | ndarray[Any, dtype[floating[Any] | integer[Any]]] | None = ..., fx: float = ..., fy: float = ..., interpolation: int = ...) -> MatLike [closest match]\n  (src: UMat, dsize: Sequence[int] | None, dst: UMat | None = ..., fx: float = ..., fy: float = ..., interpolation: int = ...) -> UMat",
-      "concise_description": "No matching overload found for function `cv2.resize` called with arguments: (list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown], tuple[int, ...], dst=None, interpolation=int)",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])",
       "severity": "error"
     },
     {
-      "line": 861,
-      "column": 45,
-      "stop_line": 861,
-      "stop_column": 59,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 254,
+      "column": 32,
+      "stop_line": 254,
+      "stop_column": 83,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`direction_list` may be uninitialized",
-      "concise_description": "`direction_list` may be uninitialized",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])",
       "severity": "error"
     },
     {
-      "line": 862,
-      "column": 47,
-      "stop_line": 862,
-      "stop_column": 61,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 278,
+      "column": 32,
+      "stop_line": 278,
+      "stop_column": 83,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`direction_list` may be uninitialized",
-      "concise_description": "`direction_list` may be uninitialized",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])",
       "severity": "error"
     },
     {
-      "line": 864,
-      "column": 27,
-      "stop_line": 864,
-      "stop_column": 41,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 295,
+      "column": 12,
+      "stop_line": 295,
+      "stop_column": 73,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`direction_list` may be uninitialized",
-      "concise_description": "`direction_list` may be uninitialized",
+      "name": "bad-return",
+      "description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[MockSample, OTXSampleBatch, OTXPredictionBatch]`",
+      "concise_description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[MockSample, OTXSampleBatch, OTXPredictionBatch]`",
       "severity": "error"
     },
     {
-      "line": 864,
-      "column": 45,
-      "stop_line": 864,
-      "stop_column": 54,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 303,
+      "column": 32,
+      "stop_line": 303,
+      "stop_column": 83,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`prob_list` may be uninitialized",
-      "concise_description": "`prob_list` may be uninitialized",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])",
       "severity": "error"
     },
     {
-      "line": 931,
-      "column": 16,
-      "stop_line": 931,
-      "stop_column": 45,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 304,
+      "column": 43,
+      "stop_line": 304,
+      "stop_column": 113,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `Tensor` is not assignable to declared return type `bool`",
-      "concise_description": "Returned type `Tensor` is not assignable to declared return type `bool`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xyxy'], canvas_size=tuple[Literal[10], Literal[10]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xyxy'], canvas_size=tuple[Literal[10], Literal[10]])",
       "severity": "error"
     },
     {
-      "line": 1013,
-      "column": 16,
-      "stop_line": 1025,
-      "stop_column": 10,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 327,
+      "column": 12,
+      "stop_line": 327,
+      "stop_column": 73,
+      "path": "tests/conftest.py",
       "code": -2,
       "name": "bad-return",
-      "description": "Returned type `tuple[bool, bool, bool, bool, bool, bool, float, float, float, float, ndarray[tuple[int, ...], dtype[signedinteger[_64Bit]]]]` is not assignable to declared return type `Sequence[float | int]`",
-      "concise_description": "Returned type `tuple[bool, bool, bool, bool, bool, bool, float, float, float, float, ndarray[tuple[int, ...], dtype[signedinteger[_64Bit]]]]` is not assignable to declared return type `Sequence[float | int]`",
+      "description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
+      "concise_description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
       "severity": "error"
     },
     {
-      "line": 1034,
-      "column": 19,
-      "stop_line": 1034,
-      "stop_column": 29,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 335,
+      "column": 32,
+      "stop_line": 335,
+      "stop_column": 83,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `list` has no attribute `astype`",
-      "concise_description": "Object of class `list` has no attribute `astype`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])",
       "severity": "error"
     },
     {
-      "line": 1228,
-      "column": 44,
-      "stop_line": 1228,
-      "stop_column": 47,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 336,
+      "column": 43,
+      "stop_line": 336,
+      "stop_column": 113,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to parameter `image` with type `ndarray[Unknown, Unknown]` in function `RandomAffine._warp_image`",
-      "concise_description": "Argument `list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to parameter `image` with type `ndarray[Unknown, Unknown]` in function `RandomAffine._warp_image`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xyxy'], canvas_size=tuple[Literal[10], Literal[10]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xyxy'], canvas_size=tuple[Literal[10], Literal[10]])",
       "severity": "error"
     },
     {
-      "line": 1285,
-      "column": 33,
-      "stop_line": 1285,
-      "stop_column": 46,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 363,
+      "column": 12,
+      "stop_line": 363,
+      "stop_column": 73,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `bboxes`",
-      "concise_description": "Object of class `OTXSample` has no attribute `bboxes`",
+      "name": "bad-return",
+      "description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
+      "concise_description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
       "severity": "error"
     },
     {
-      "line": 1294,
-      "column": 13,
-      "stop_line": 1294,
-      "stop_column": 26,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 371,
+      "column": 32,
+      "stop_line": 371,
+      "stop_column": 83,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `bboxes`",
-      "concise_description": "Object of class `OTXSample` has no attribute `bboxes`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])",
       "severity": "error"
     },
     {
-      "line": 1301,
-      "column": 16,
-      "stop_line": 1301,
-      "stop_column": 27,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 391,
+      "column": 12,
+      "stop_line": 391,
+      "stop_column": 73,
+      "path": "tests/conftest.py",
       "code": -2,
       "name": "bad-return",
-      "description": "Returned type `BoolTensor` is not assignable to declared return type `ndarray[Unknown, Unknown]`",
-      "concise_description": "Returned type `BoolTensor` is not assignable to declared return type `ndarray[Unknown, Unknown]`",
+      "description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
+      "concise_description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
       "severity": "error"
     },
     {
-      "line": 1339,
-      "column": 13,
-      "stop_line": 1339,
-      "stop_column": 25,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 489,
+      "column": 20,
+      "stop_line": 496,
+      "stop_column": 10,
+      "path": "tests/conftest.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `masks`",
-      "concise_description": "Object of class `OTXSample` has no attribute `masks`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
+      "concise_description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
       "severity": "error"
     },
     {
-      "line": 1386,
-      "column": 18,
-      "stop_line": 1386,
-      "stop_column": 31,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 23,
+      "column": 36,
+      "stop_line": 23,
+      "stop_column": 54,
+      "path": "tests/integration/api/test_engine_api.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `bboxes`",
-      "concise_description": "Object of class `OTXSample` has no attribute `bboxes`",
+      "description": "No attribute `RECIPE_LIST` in module `pytest`",
+      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
       "severity": "error"
     },
     {
-      "line": 1389,
-      "column": 17,
-      "stop_line": 1389,
-      "stop_column": 29,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 78,
+      "column": 15,
+      "stop_line": 78,
+      "stop_column": 34,
+      "path": "tests/integration/api/test_engine_api.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `masks`",
-      "concise_description": "Object of class `OTXSample` has no attribute `masks`",
+      "name": "bad-argument-type",
+      "description": "Argument `Path | dict[str, Path]` is not assignable to parameter `model` with type `OTXModel | OVModel | Path | PathLike[Unknown] | str` in function `otx.engine.create_engine`",
+      "concise_description": "Argument `Path | dict[str, Path]` is not assignable to parameter `model` with type `OTXModel | OVModel | Path | PathLike[Unknown] | str` in function `otx.engine.create_engine`",
       "severity": "error"
     },
     {
-      "line": 1397,
-      "column": 9,
-      "stop_line": 1397,
-      "stop_column": 22,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 106,
+      "column": 32,
+      "stop_line": 106,
+      "stop_column": 59,
+      "path": "tests/integration/api/test_engine_api.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `bboxes`",
-      "concise_description": "Object of class `OTXSample` has no attribute `bboxes`",
+      "description": "Object of class `OTXSample` has no attribute `saliency_map`",
+      "concise_description": "Object of class `OTXSample` has no attribute `saliency_map`",
       "severity": "error"
     },
     {
-      "line": 1748,
-      "column": 24,
-      "stop_line": 1748,
-      "stop_column": 88,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 110,
+      "column": 36,
+      "stop_line": 110,
+      "stop_column": 59,
+      "path": "tests/integration/api/test_engine_api.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `min` called with arguments: (int, float)\n  Possible overloads:\n  (arg1: SupportsRichComparisonT, arg2: SupportsRichComparisonT, /, *_args: SupportsRichComparisonT, *, key: None = None) -> SupportsRichComparisonT [closest match]\n  (arg1: _T, arg2: _T, /, *_args: _T, *, key: (_T) -> SupportsRichComparison) -> _T\n  (iterable: Iterable[SupportsRichComparisonT], /, *, key: None = None) -> SupportsRichComparisonT\n  (iterable: Iterable[_T], /, *, key: (_T) -> SupportsRichComparison) -> _T\n  (iterable: Iterable[SupportsRichComparisonT], /, *, key: None = None, default: _T) -> SupportsRichComparisonT | _T\n  (iterable: Iterable[_T1], /, *, key: (_T1) -> SupportsRichComparison, default: _T2) -> _T1 | _T2",
-      "concise_description": "No matching overload found for function `min` called with arguments: (int, float)",
+      "name": "missing-attribute",
+      "description": "No attribute `TILE_RECIPE_LIST` in module `pytest`",
+      "concise_description": "No attribute `TILE_RECIPE_LIST` in module `pytest`",
       "severity": "error"
     },
     {
-      "line": 1761,
-      "column": 24,
-      "stop_line": 1761,
-      "stop_column": 88,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 130,
+      "column": 35,
+      "stop_line": 130,
+      "stop_column": 41,
+      "path": "tests/integration/api/test_engine_api.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `min` called with arguments: (int, float)\n  Possible overloads:\n  (arg1: SupportsRichComparisonT, arg2: SupportsRichComparisonT, /, *_args: SupportsRichComparisonT, *, key: None = None) -> SupportsRichComparisonT [closest match]\n  (arg1: _T, arg2: _T, /, *_args: _T, *, key: (_T) -> SupportsRichComparison) -> _T\n  (iterable: Iterable[SupportsRichComparisonT], /, *, key: None = None) -> SupportsRichComparisonT\n  (iterable: Iterable[_T], /, *, key: (_T) -> SupportsRichComparison) -> _T\n  (iterable: Iterable[SupportsRichComparisonT], /, *, key: None = None, default: _T) -> SupportsRichComparisonT | _T\n  (iterable: Iterable[_T1], /, *, key: (_T1) -> SupportsRichComparison, default: _T2) -> _T1 | _T2",
-      "concise_description": "No matching overload found for function `min` called with arguments: (int, float)",
+      "name": "unexpected-keyword",
+      "description": "Unexpected keyword argument `format` in function `datumaro.util.real_decorator`",
+      "concise_description": "Unexpected keyword argument `format` in function `datumaro.util.real_decorator`",
       "severity": "error"
     },
     {
-      "line": 1861,
-      "column": 16,
-      "stop_line": 1861,
-      "stop_column": 21,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 130,
+      "column": 78,
+      "stop_line": 130,
+      "stop_column": 88,
+      "path": "tests/integration/api/test_engine_api.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `signedinteger[_64Bit]` is not assignable to declared return type `int`",
-      "concise_description": "Returned type `signedinteger[_64Bit]` is not assignable to declared return type `int`",
+      "name": "unexpected-keyword",
+      "description": "Unexpected keyword argument `save_media` in function `datumaro.util.real_decorator`",
+      "concise_description": "Unexpected keyword argument `save_media` in function `datumaro.util.real_decorator`",
       "severity": "error"
     },
     {
-      "line": 2070,
-      "column": 27,
-      "stop_line": 2070,
-      "stop_column": 52,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 153,
+      "column": 58,
+      "stop_line": 153,
+      "stop_column": 82,
+      "path": "tests/integration/api/test_engine_api.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to `ndarray[Unknown, Unknown]`",
-      "concise_description": "`list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to `ndarray[Unknown, Unknown]`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tiler` has no attribute `tile_size`",
+      "concise_description": "Object of class `Tiler` has no attribute `tile_size`",
       "severity": "error"
     },
     {
-      "line": 2174,
-      "column": 27,
-      "stop_line": 2174,
-      "stop_column": 52,
-      "path": "src/otx/data/transform_libs/torchvision.py",
-      "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to `ndarray[Unknown, Unknown]`",
-      "concise_description": "`list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to `ndarray[Unknown, Unknown]`",
-      "severity": "error"
-    },
-    {
-      "line": 2185,
-      "column": 33,
-      "stop_line": 2185,
-      "stop_column": 37,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 154,
+      "column": 53,
+      "stop_line": 154,
+      "stop_column": 81,
+      "path": "tests/integration/api/test_engine_api.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`size` may be uninitialized",
-      "concise_description": "`size` may be uninitialized",
+      "name": "missing-attribute",
+      "description": "Object of class `Tiler` has no attribute `tiles_overlap`",
+      "concise_description": "Object of class `Tiler` has no attribute `tiles_overlap`",
       "severity": "error"
     },
     {
-      "line": 2186,
-      "column": 33,
-      "stop_line": 2186,
+      "line": 14,
+      "column": 19,
+      "stop_line": 14,
       "stop_column": 37,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "path": "tests/integration/api/test_xai.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`size` may be uninitialized",
-      "concise_description": "`size` may be uninitialized",
+      "name": "missing-attribute",
+      "description": "No attribute `RECIPE_LIST` in module `pytest`",
+      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
       "severity": "error"
     },
     {
-      "line": 2194,
-      "column": 21,
-      "stop_line": 2194,
-      "stop_column": 25,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 70,
+      "column": 22,
+      "stop_line": 70,
+      "stop_column": 46,
+      "path": "tests/integration/api/test_xai.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`size` may be uninitialized",
-      "concise_description": "`size` may be uninitialized",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 2195,
-      "column": 22,
-      "stop_line": 2195,
-      "stop_column": 26,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 72,
+      "column": 20,
+      "stop_line": 72,
+      "stop_column": 47,
+      "path": "tests/integration/api/test_xai.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`size` may be uninitialized",
-      "concise_description": "`size` may be uninitialized",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 2539,
-      "column": 16,
-      "stop_line": 2539,
-      "stop_column": 34,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 72,
+      "column": 20,
+      "stop_line": 72,
+      "stop_column": 86,
+      "path": "tests/integration/api/test_xai.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[signedinteger[_64Bit], signedinteger[_64Bit]]` is not assignable to declared return type `tuple[int, int]`",
-      "concise_description": "Returned type `tuple[signedinteger[_64Bit], signedinteger[_64Bit]]` is not assignable to declared return type `tuple[int, int]`",
+      "name": "bad-argument-type",
+      "description": "Argument `Tensor | bool | Unknown` is not assignable to parameter `iterable` with type `Iterable[object]` in function `all`\n  Protocol `Iterable` requires attribute `__iter__`",
+      "concise_description": "Argument `Tensor | bool | Unknown` is not assignable to parameter `iterable` with type `Iterable[object]` in function `all`",
       "severity": "error"
     },
     {
-      "line": 2559,
-      "column": 20,
-      "stop_line": 2559,
-      "stop_column": 34,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 72,
+      "column": 51,
+      "stop_line": 72,
+      "stop_column": 86,
+      "path": "tests/integration/api/test_xai.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[signedinteger[_64Bit], signedinteger[_64Bit]]` is not assignable to declared return type `tuple[int, int]`",
-      "concise_description": "Returned type `tuple[signedinteger[_64Bit], signedinteger[_64Bit]]` is not assignable to declared return type `tuple[int, int]`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 2654,
-      "column": 54,
-      "stop_line": 2654,
-      "stop_column": 63,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 73,
+      "column": 20,
+      "stop_line": 73,
+      "stop_column": 47,
+      "path": "tests/integration/api/test_xai.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `(...) -> Unknown` is not assignable to parameter `transform` with type `Transform` in function `Compose._is_native_torchvision_transform`",
-      "concise_description": "Argument `(...) -> Unknown` is not assignable to parameter `transform` with type `Transform` in function `Compose._is_native_torchvision_transform`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 2656,
-      "column": 56,
-      "stop_line": 2656,
-      "stop_column": 65,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 73,
+      "column": 20,
+      "stop_line": 73,
+      "stop_column": 86,
+      "path": "tests/integration/api/test_xai.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `(...) -> Unknown` is not assignable to parameter `transform` with type `Transform` in function `Compose._apply_native_transform`",
-      "concise_description": "Argument `(...) -> Unknown` is not assignable to parameter `transform` with type `Transform` in function `Compose._apply_native_transform`",
+      "description": "Argument `Tensor | bool | Unknown` is not assignable to parameter `iterable` with type `Iterable[object]` in function `all`\n  Protocol `Iterable` requires attribute `__iter__`",
+      "concise_description": "Argument `Tensor | bool | Unknown` is not assignable to parameter `iterable` with type `Iterable[object]` in function `all`",
       "severity": "error"
     },
     {
-      "line": 2664,
-      "column": 16,
-      "stop_line": 2664,
-      "stop_column": 23,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 73,
+      "column": 51,
+      "stop_line": 73,
+      "stop_column": 86,
+      "path": "tests/integration/api/test_xai.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`outputs` may be uninitialized",
-      "concise_description": "`outputs` may be uninitialized",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 2814,
-      "column": 17,
-      "stop_line": 2814,
-      "stop_column": 32,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 183,
+      "column": 29,
+      "stop_line": 183,
+      "stop_column": 59,
+      "path": "tests/integration/api/test_xai.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`ndarray[tuple[int, ...], dtype[Any]]` is not assignable to variable `shift` with type `tuple[float, float]`",
-      "concise_description": "`ndarray[tuple[int, ...], dtype[Any]]` is not assignable to variable `shift` with type `tuple[float, float]`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `astype`\n  Did you mean `dtype`?",
+      "concise_description": "Object of class `Tensor` has no attribute `astype`",
       "severity": "error"
     },
     {
-      "line": 2840,
-      "column": 46,
-      "stop_line": 2840,
-      "stop_column": 80,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 183,
+      "column": 74,
+      "stop_line": 183,
+      "stop_column": 101,
+      "path": "tests/integration/api/test_xai.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `cv2.getAffineTransform` called with arguments: (floating[_32Bit], floating[_32Bit])\n  Possible overloads:\n  (src: MatLike, dst: MatLike) -> MatLike [closest match]\n  (src: UMat, dst: UMat) -> MatLike",
-      "concise_description": "No matching overload found for function `cv2.getAffineTransform` called with arguments: (floating[_32Bit], floating[_32Bit])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `astype`\n  Did you mean `dtype`?",
+      "concise_description": "Object of class `Tensor` has no attribute `astype`",
       "severity": "error"
     },
     {
-      "line": 2842,
-      "column": 46,
-      "stop_line": 2842,
-      "stop_column": 80,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 137,
+      "column": 5,
+      "stop_line": 137,
+      "stop_column": 21,
+      "path": "tests/integration/conftest.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `cv2.getAffineTransform` called with arguments: (floating[_32Bit], floating[_32Bit])\n  Possible overloads:\n  (src: MatLike, dst: MatLike) -> MatLike [closest match]\n  (src: UMat, dst: UMat) -> MatLike",
-      "concise_description": "No matching overload found for function `cv2.getAffineTransform` called with arguments: (floating[_32Bit], floating[_32Bit])",
+      "name": "missing-attribute",
+      "description": "No attribute `TASK_LIST` in module `pytest`",
+      "concise_description": "No attribute `TASK_LIST` in module `pytest`",
       "severity": "error"
     },
     {
-      "line": 2851,
-      "column": 35,
-      "stop_line": 2851,
-      "stop_column": 53,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 138,
+      "column": 5,
+      "stop_line": 138,
+      "stop_column": 23,
+      "path": "tests/integration/conftest.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to `ndarray[Unknown, Unknown]`",
-      "concise_description": "`list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]` is not assignable to `ndarray[Unknown, Unknown]`",
+      "name": "missing-attribute",
+      "description": "No attribute `RECIPE_LIST` in module `pytest`",
+      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
       "severity": "error"
     },
     {
-      "line": 2981,
-      "column": 54,
-      "stop_line": 2981,
-      "stop_column": 65,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 139,
+      "column": 5,
+      "stop_line": 139,
+      "stop_column": 26,
+      "path": "tests/integration/conftest.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`_input_size` may be uninitialized",
-      "concise_description": "`_input_size` may be uninitialized",
+      "name": "missing-attribute",
+      "description": "No attribute `RECIPE_OV_LIST` in module `pytest`",
+      "concise_description": "No attribute `RECIPE_OV_LIST` in module `pytest`",
       "severity": "error"
     },
     {
-      "line": 2983,
-      "column": 54,
-      "stop_line": 2983,
-      "stop_column": 65,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 140,
+      "column": 5,
+      "stop_line": 140,
+      "stop_column": 28,
+      "path": "tests/integration/conftest.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`_input_size` may be uninitialized",
-      "concise_description": "`_input_size` may be uninitialized",
+      "name": "missing-attribute",
+      "description": "No attribute `TILE_RECIPE_LIST` in module `pytest`",
+      "concise_description": "No attribute `TILE_RECIPE_LIST` in module `pytest`",
       "severity": "error"
     },
     {
-      "line": 2985,
-      "column": 29,
-      "stop_line": 2985,
-      "stop_column": 40,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 210,
+      "column": 23,
+      "stop_line": 210,
+      "stop_column": 33,
+      "path": "tests/perf_v2/benchmark.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`_input_size` may be uninitialized",
-      "concise_description": "`_input_size` may be uninitialized",
+      "name": "bad-argument-type",
+      "description": "Argument `Path | str | None` is not assignable to parameter `model` with type `OVModel | Path | PathLike[Unknown] | str` in function `otx.backend.openvino.engine.OVEngine.__init__`",
+      "concise_description": "Argument `Path | str | None` is not assignable to parameter `model` with type `OVModel | Path | PathLike[Unknown] | str` in function `otx.backend.openvino.engine.OVEngine.__init__`",
       "severity": "error"
     },
     {
-      "line": 2987,
-      "column": 54,
-      "stop_line": 2987,
-      "stop_column": 65,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 479,
+      "column": 25,
+      "stop_line": 479,
+      "stop_column": 47,
+      "path": "tests/perf_v2/benchmark.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`_input_size` may be uninitialized",
-      "concise_description": "`_input_size` may be uninitialized",
+      "name": "missing-attribute",
+      "description": "Object of class `NoneType` has no attribute `rglob`",
+      "concise_description": "Object of class `NoneType` has no attribute `rglob`",
       "severity": "error"
     },
     {
-      "line": 3027,
-      "column": 32,
-      "stop_line": 3027,
-      "stop_column": 45,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 601,
+      "column": 28,
+      "stop_line": 601,
+      "stop_column": 59,
+      "path": "tests/perf_v2/benchmark.py",
       "code": -2,
       "name": "bad-index",
-      "description": "Cannot index into `dict[type[Div] | type[Mult], ((a: Any, b: Any, /) -> Any)]`\n  Argument `type[operator]` is not assignable to parameter `key` with type `type[Div] | type[Mult]` in function `dict.__getitem__`",
-      "concise_description": "Cannot index into `dict[type[Div] | type[Mult], ((a: Any, b: Any, /) -> Any)]`",
+      "description": "Cannot index into `_LocIndexerFrame[DataFrame]`\n  No matching overload found for function `pandas.core.frame._LocIndexerFrame.__getitem__` called with arguments: (Hashable)\n  Possible overloads:\n  (idx: tuple[((DataFrame) -> ScalarT) | Timestamp | int | str | str_ | tuple[Scalar, ...], int | str | str_ | tuple[Scalar, ...]]) -> Scalar [closest match]\n  (idx: ((DataFrame) -> ScalarT) | tuple[((DataFrame) -> Index[Any] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | list[HashableT] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any] | ScalarT) | Index[Any] | SequenceNotStr[Timestamp | float | str] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any] | tuple[IndexOpsMixin[Any, Any] | Sequence[Scalar] | Series[builtins.bool] | Timedelta | Timestamp | builtins.bool | bytes | complex | complexfloating[Any, Any] | date | datetime | datetime64[date | int | None] | float | floating[Any] | int | integer[Any] | list[builtins.bool] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | slice[Any, Any, Any] | str | timedelta | timedelta64[int | timedelta | None], ...], ScalarT | None] | None) -> Series[Any]\n  (idx: Scalar) -> DataFrame | Series[Any]\n  (idx: tuple[Scalar, slice[Any, Any, Any]]) -> DataFrame | Series[Any]\n  (key: ((DataFrame) -> Index[Any] | Sequence[Hashable] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any]) | Index[Any] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | list[HashableT] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any] | tuple[((...) -> Unknown) | Index[Any] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | list[HashableT] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any] | tuple[IndexOpsMixin[Any, Any] | Sequence[Scalar] | Series[builtins.bool] | Timedelta | Timestamp | builtins.bool | bytes | complex | complexfloating[Any, Any] | date | datetime | datetime64[date | int | None] | float | floating[Any] | int | integer[Any] | list[builtins.bool] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | slice[Any, Any, Any] | str | timedelta | timedelta64[int | timedelta | None], ...], ((...) -> Unknown) | Index[Any] | Iterable[HashableT] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any]]) -> DataFrame",
+      "concise_description": "Cannot index into `_LocIndexerFrame[DataFrame]`",
       "severity": "error"
     },
     {
-      "line": 3029,
-      "column": 95,
-      "stop_line": 3029,
-      "stop_column": 113,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 643,
+      "column": 13,
+      "stop_line": 643,
+      "stop_column": 120,
+      "path": "tests/perf_v2/benchmark.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `expr` has no attribute `value`",
-      "concise_description": "Object of class `expr` has no attribute `value`",
+      "name": "bad-argument-type",
+      "description": "Argument `bool | str` is not assignable to parameter `deterministic` with type `bool` in function `Benchmark.__init__`",
+      "concise_description": "Argument `bool | str` is not assignable to parameter `deterministic` with type `bool` in function `Benchmark.__init__`",
       "severity": "error"
     },
     {
-      "line": 3036,
-      "column": 26,
-      "stop_line": 3036,
-      "stop_column": 63,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 659,
+      "column": 16,
+      "stop_line": 659,
+      "stop_column": 22,
+      "path": "tests/perf_v2/benchmark.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `int` is not assignable to parameter `iterable` with type `Iterable[int]` in function `tuple.__new__`\n  Protocol `Iterable` requires attribute `__iter__`",
-      "concise_description": "Argument `int` is not assignable to parameter `iterable` with type `Iterable[int]` in function `tuple.__new__`",
+      "description": "Argument `DataFrame | None` is not assignable to parameter `result` with type `DataFrame` in function `Benchmark.check`",
+      "concise_description": "Argument `DataFrame | None` is not assignable to parameter `result` with type `DataFrame` in function `Benchmark.check`",
       "severity": "error"
     },
     {
-      "line": 3042,
-      "column": 57,
-      "stop_line": 3042,
-      "stop_column": 70,
-      "path": "src/otx/data/transform_libs/torchvision.py",
+      "line": 134,
+      "column": 34,
+      "stop_line": 134,
+      "stop_column": 83,
+      "path": "tests/perf_v2/summary.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DictConfig | dict[Unknown, Unknown]` is not assignable to parameter `init` with type `dict[str, Any]` in function `lightning.pytorch.cli.instantiate_class`",
-      "concise_description": "Argument `DictConfig | dict[Unknown, Unknown]` is not assignable to parameter `init` with type `dict[str, Any]` in function `lightning.pytorch.cli.instantiate_class`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `pandas.core.groupby.generic.DataFrameGroupBy.aggregate` called with arguments: (dict[str, list[str]])\n  Possible overloads:\n  (func: Literal['size']) -> Series[Any] [closest match]\n  (func: ((...) -> Unknown) | Mapping[Unknown, ((...) -> Unknown) | list[AggFuncTypeBase] | str | ufunc] | list[AggFuncTypeBase] | str | ufunc | None = ..., *args: Any, *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., **kwargs: Any) -> DataFrame\n  (func: ((...) -> Unknown) | Mapping[Unknown, ((...) -> Unknown) | list[AggFuncTypeBase] | str | ufunc] | list[AggFuncTypeBase] | str | ufunc | None = None, /, **kwargs: Any) -> DataFrame",
+      "concise_description": "No matching overload found for function `pandas.core.groupby.generic.DataFrameGroupBy.aggregate` called with arguments: (dict[str, list[str]])",
       "severity": "error"
     },
     {
-      "line": 73,
-      "column": 20,
-      "stop_line": 73,
-      "stop_column": 37,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 274,
+      "column": 34,
+      "stop_line": 274,
+      "stop_column": 83,
+      "path": "tests/perf_v2/summary.py",
       "code": -2,
-      "name": "not-callable",
-      "description": "Expected a callable, got `None`",
-      "concise_description": "Expected a callable, got `None`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `pandas.core.groupby.generic.DataFrameGroupBy.aggregate` called with arguments: (dict[str, list[str]])\n  Possible overloads:\n  (func: Literal['size']) -> Series[Any] [closest match]\n  (func: ((...) -> Unknown) | Mapping[Unknown, ((...) -> Unknown) | list[AggFuncTypeBase] | str | ufunc] | list[AggFuncTypeBase] | str | ufunc | None = ..., *args: Any, *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., **kwargs: Any) -> DataFrame\n  (func: ((...) -> Unknown) | Mapping[Unknown, ((...) -> Unknown) | list[AggFuncTypeBase] | str | ufunc] | list[AggFuncTypeBase] | str | ufunc | None = None, /, **kwargs: Any) -> DataFrame",
+      "concise_description": "No matching overload found for function `pandas.core.groupby.generic.DataFrameGroupBy.aggregate` called with arguments: (dict[str, list[str]])",
       "severity": "error"
     },
     {
-      "line": 74,
-      "column": 16,
-      "stop_line": 74,
-      "stop_column": 29,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 53,
+      "column": 33,
+      "stop_line": 53,
+      "stop_column": 39,
+      "path": "tests/regression/test_regression.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `cache_randomness` has no attribute `__name__`",
-      "concise_description": "Object of class `cache_randomness` has no attribute `__name__`",
+      "name": "bad-argument-type",
+      "description": "Argument `TempdirFactory` is not assignable to parameter `*args` with type `PathLike[str] | str` in function `pathlib.Path.__new__`",
+      "concise_description": "Argument `TempdirFactory` is not assignable to parameter `*args` with type `PathLike[str] | str` in function `pathlib.Path.__new__`",
       "severity": "error"
     },
     {
-      "line": 115,
-      "column": 16,
-      "stop_line": 115,
-      "stop_column": 30,
-      "path": "src/otx/data/transform_libs/utils.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[int, ...]` is not assignable to declared return type `tuple[int, int]`",
-      "concise_description": "Returned type `tuple[int, ...]` is not assignable to declared return type `tuple[int, int]`",
+      "line": 85,
+      "column": 9,
+      "stop_line": 85,
+      "stop_column": 20,
+      "path": "tests/test_helpers.py",
+      "code": -2,
+      "name": "bad-override",
+      "description": "Class member `CommonSemanticSegmentationExporter._apply_impl` overrides parent class `Exporter` in an inconsistent manner\n  `CommonSemanticSegmentationExporter._apply_impl` has type `BoundMethod[CommonSemanticSegmentationExporter, (self: CommonSemanticSegmentationExporter) -> None]`, which is not assignable to `BoundMethod[CommonSemanticSegmentationExporter, (self: CommonSemanticSegmentationExporter) -> Never]`, the type of `Exporter._apply_impl`",
+      "concise_description": "Class member `CommonSemanticSegmentationExporter._apply_impl` overrides parent class `Exporter` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 133,
-      "column": 16,
-      "stop_line": 133,
-      "stop_column": 47,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 100,
+      "column": 48,
+      "stop_line": 100,
+      "stop_column": 80,
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `list[list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]]` is not assignable to declared return type `list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]`",
-      "concise_description": "Returned type `list[list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]]` is not assignable to declared return type `list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]`",
+      "name": "not-iterable",
+      "description": "Type `Categories` is not iterable",
+      "concise_description": "Type `Categories` is not iterable",
       "severity": "error"
     },
     {
-      "line": 162,
-      "column": 20,
-      "stop_line": 162,
-      "stop_column": 66,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 125,
+      "column": 52,
+      "stop_line": 125,
+      "stop_column": 60,
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`Tensor` is not assignable to variable `scale_factor` with type `tuple[float, float]`",
-      "concise_description": "`Tensor` is not assignable to variable `scale_factor` with type `tuple[float, float]`",
+      "name": "bad-argument-type",
+      "description": "Argument `Path` is not assignable to parameter `save_dir` with type `str` in function `CommonSemanticSegmentationExporter._export_item_annotation`",
+      "concise_description": "Argument `Path` is not assignable to parameter `save_dir` with type `str` in function `CommonSemanticSegmentationExporter._export_item_annotation`",
       "severity": "error"
     },
     {
-      "line": 163,
-      "column": 12,
-      "stop_line": 163,
-      "stop_column": 32,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 137,
+      "column": 28,
+      "stop_line": 137,
+      "stop_column": 43,
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[float, ...]` is not assignable to declared return type `Tensor`",
-      "concise_description": "Returned type `tuple[float, ...]` is not assignable to declared return type `Tensor`",
+      "name": "missing-attribute",
+      "description": "Object of class `NoneType` has no attribute `data`",
+      "concise_description": "Object of class `NoneType` has no attribute `data`",
       "severity": "error"
     },
     {
-      "line": 186,
-      "column": 20,
-      "stop_line": 186,
-      "stop_column": 89,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 144,
+      "column": 45,
+      "stop_line": 144,
+      "stop_column": 67,
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `cv2.resize` called with arguments: (Any, tuple[float | int | tuple[int, int], ...] | tuple[int, ...], interpolation=int)\n  Possible overloads:\n  (src: MatLike, dsize: Sequence[int] | None, dst: Mat | ndarray[Any, dtype[floating[Any] | integer[Any]]] | None = ..., fx: float = ..., fy: float = ..., interpolation: int = ...) -> MatLike [closest match]\n  (src: UMat, dsize: Sequence[int] | None, dst: UMat | None = ..., fx: float = ..., fy: float = ..., interpolation: int = ...) -> UMat",
-      "concise_description": "No matching overload found for function `cv2.resize` called with arguments: (Any, tuple[float | int | tuple[int, int], ...] | tuple[int, ...], interpolation=int)",
+      "name": "bad-argument-type",
+      "description": "Argument `list[ndarray[tuple[int, ...], dtype[Any]]]` is not assignable to parameter `polygons` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.polygon_to_bitmap`",
+      "concise_description": "Argument `list[ndarray[tuple[int, ...], dtype[Any]]]` is not assignable to parameter `polygons` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.polygon_to_bitmap`",
       "severity": "error"
     },
     {
-      "line": 267,
-      "column": 37,
-      "stop_line": 267,
-      "stop_column": 45,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 144,
+      "column": 55,
+      "stop_line": 144,
+      "stop_column": 65,
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`channels` may be uninitialized",
-      "concise_description": "`channels` may be uninitialized",
+      "name": "missing-attribute",
+      "description": "Object of class `Annotation` has no attribute `points`",
+      "concise_description": "Object of class `Annotation` has no attribute `points`",
       "severity": "error"
     },
     {
-      "line": 269,
-      "column": 61,
-      "stop_line": 269,
-      "stop_column": 69,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 145,
+      "column": 38,
+      "stop_line": 145,
+      "stop_column": 47,
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`channels` may be uninitialized",
-      "concise_description": "`channels` may be uninitialized",
+      "name": "missing-attribute",
+      "description": "Object of class `Annotation` has no attribute `label`",
+      "concise_description": "Object of class `Annotation` has no attribute `label`",
       "severity": "error"
     },
     {
-      "line": 305,
-      "column": 39,
-      "stop_line": 305,
-      "stop_column": 66,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 147,
+      "column": 45,
+      "stop_line": 147,
+      "stop_column": 91,
+      "path": "tests/test_helpers.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[list[float | int] | list[int]]` is not assignable to parameter `value` with type `SupportsFloat | SupportsIndex | bytes | str | None` in function `numpy.floating.__init__`",
-      "concise_description": "Argument `list[list[float | int] | list[int]]` is not assignable to parameter `value` with type `SupportsFloat | SupportsIndex | bytes | str | None` in function `numpy.floating.__init__`",
+      "description": "Argument `list[ndarray[tuple[int, ...], dtype[Any]]]` is not assignable to parameter `polygons` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.polygon_to_bitmap`",
+      "concise_description": "Argument `list[ndarray[tuple[int, ...], dtype[Any]]]` is not assignable to parameter `polygons` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.polygon_to_bitmap`",
       "severity": "error"
     },
     {
-      "line": 307,
-      "column": 39,
-      "stop_line": 307,
-      "stop_column": 66,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 147,
+      "column": 63,
+      "stop_line": 147,
+      "stop_column": 77,
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[list[float | int] | list[int]]` is not assignable to parameter `value` with type `SupportsFloat | SupportsIndex | bytes | str | None` in function `numpy.floating.__init__`",
-      "concise_description": "Argument `list[list[float | int] | list[int]]` is not assignable to parameter `value` with type `SupportsFloat | SupportsIndex | bytes | str | None` in function `numpy.floating.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `Annotation` has no attribute `as_polygon`",
+      "concise_description": "Object of class `Annotation` has no attribute `as_polygon`",
       "severity": "error"
     },
     {
-      "line": 308,
-      "column": 12,
-      "stop_line": 308,
-      "stop_column": 28,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 148,
+      "column": 38,
+      "stop_line": 148,
+      "stop_column": 47,
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `floating[_32Bit]` is not assignable to declared return type `ndarray[Unknown, Unknown]`",
-      "concise_description": "Returned type `floating[_32Bit]` is not assignable to declared return type `ndarray[Unknown, Unknown]`",
+      "name": "missing-attribute",
+      "description": "Object of class `Annotation` has no attribute `label`",
+      "concise_description": "Object of class `Annotation` has no attribute `label`",
       "severity": "error"
     },
     {
-      "line": 308,
-      "column": 12,
-      "stop_line": 308,
-      "stop_column": 28,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 150,
+      "column": 49,
+      "stop_line": 150,
+      "stop_column": 61,
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`translate_matrix` may be uninitialized",
-      "concise_description": "`translate_matrix` may be uninitialized",
+      "name": "missing-attribute",
+      "description": "Object of class `Annotation` has no attribute `get_bbox`",
+      "concise_description": "Object of class `Annotation` has no attribute `get_bbox`",
       "severity": "error"
     },
     {
-      "line": 351,
-      "column": 13,
-      "stop_line": 354,
-      "stop_column": 53,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 151,
+      "column": 55,
+      "stop_line": 151,
+      "stop_column": 64,
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `Tensor` is not assignable to declared return type `BoolTensor`",
-      "concise_description": "Returned type `Tensor` is not assignable to declared return type `BoolTensor`",
+      "name": "missing-attribute",
+      "description": "Object of class `Annotation` has no attribute `label`",
+      "concise_description": "Object of class `Annotation` has no attribute `label`",
       "severity": "error"
     },
     {
-      "line": 357,
-      "column": 9,
-      "stop_line": 360,
+      "line": 158,
+      "column": 15,
+      "stop_line": 158,
       "stop_column": 44,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "path": "tests/test_helpers.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `Tensor` is not assignable to declared return type `BoolTensor`",
-      "concise_description": "Returned type `Tensor` is not assignable to declared return type `BoolTensor`",
+      "name": "unsupported-operation",
+      "description": "`/` is not supported between `str` and `str`\n  Cannot find `__truediv__` or `__rtruediv__`",
+      "concise_description": "`/` is not supported between `str` and `str`",
       "severity": "error"
     },
     {
-      "line": 560,
-      "column": 11,
-      "stop_line": 560,
-      "stop_column": 34,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 23,
+      "column": 17,
+      "stop_line": 23,
+      "stop_column": 28,
+      "path": "tests/unit/backend/native/callbacks/test_gpu_mem_monitor.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`Tensor` is not assignable to variable `eps` with type `float`",
-      "concise_description": "`Tensor` is not assignable to variable `eps` with type `float`",
+      "name": "bad-override",
+      "description": "Class member `DebugLogger.log_metrics` overrides parent class `CSVLogger` in an inconsistent manner\n  `DebugLogger.log_metrics` has type `BoundMethod[TestGPUMemMonitor.test_gpu_monitor.DebugLogger, (self: TestGPUMemMonitor.test_gpu_monitor.DebugLogger, metrics: dict[str, float], step: int | None = None) -> None]`, which is not assignable to `BoundMethod[TestGPUMemMonitor.test_gpu_monitor.DebugLogger, (self: TestGPUMemMonitor.test_gpu_monitor.DebugLogger, metrics: dict[str, Tensor | float], step: int | None = None) -> None]`, the type of `CSVLogger.log_metrics`",
+      "concise_description": "Class member `DebugLogger.log_metrics` overrides parent class `CSVLogger` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 561,
-      "column": 22,
-      "stop_line": 561,
-      "stop_column": 34,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 16,
+      "column": 9,
+      "stop_line": 16,
+      "stop_column": 20,
+      "path": "tests/unit/backend/native/exporter/test_base.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C._VariableFunctions.max` called with arguments: (Tensor, float)\n  Possible overloads:\n  (input: Tensor, *, out: Tensor | None = None) -> Tensor\n  (input: Tensor, other: Tensor, *, out: Tensor | None = None) -> Tensor [closest match]\n  (input: Tensor, dim: int, keepdim: bool = False, *, out: Tensor | list[Tensor] | tuple[Tensor, ...] | None = None) -> max\n  (input: Tensor, dim: EllipsisType | str | None, keepdim: bool = False, *, out: Tensor | list[Tensor] | tuple[Tensor, ...] | None = None) -> max",
-      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.max` called with arguments: (Tensor, float)",
+      "name": "bad-override",
+      "description": "Class member `MockModelExporter.to_openvino` overrides parent class `OTXModelExporter` in an inconsistent manner\n  `MockModelExporter.to_openvino` has type `BoundMethod[MockModelExporter, (self: MockModelExporter, model: Unknown, output_dir: Unknown, base_model_name: Unknown, precision: Unknown) -> Unknown]`, which is not assignable to `BoundMethod[MockModelExporter, (self: MockModelExporter, model: OTXModel, output_dir: Path, base_model_name: str = 'exported_model', precision: OTXPrecisionType = OTXPrecisionType.FP32) -> Path]`, the type of `OTXModelExporter.to_openvino`",
+      "concise_description": "Class member `MockModelExporter.to_openvino` overrides parent class `OTXModelExporter` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 566,
-      "column": 29,
-      "stop_line": 566,
-      "stop_column": 40,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 19,
+      "column": 9,
+      "stop_line": 19,
+      "stop_column": 16,
+      "path": "tests/unit/backend/native/exporter/test_base.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`enclosed_rb` may be uninitialized",
-      "concise_description": "`enclosed_rb` may be uninitialized",
+      "name": "bad-override",
+      "description": "Class member `MockModelExporter.to_onnx` overrides parent class `OTXModelExporter` in an inconsistent manner\n  `MockModelExporter.to_onnx` has type `BoundMethod[MockModelExporter, (self: MockModelExporter, model: Unknown, output_dir: Unknown, base_model_name: Unknown, precision: Unknown) -> Unknown]`, which is not assignable to `BoundMethod[MockModelExporter, (self: MockModelExporter, model: OTXModel, output_dir: Path, base_model_name: str = 'exported_model', precision: OTXPrecisionType = OTXPrecisionType.FP32, embed_metadata: bool = True) -> Path]`, the type of `OTXModelExporter.to_onnx`",
+      "concise_description": "Class member `MockModelExporter.to_onnx` overrides parent class `OTXModelExporter` in an inconsistent manner",
       "severity": "error"
     },
     {
-      "line": 566,
-      "column": 43,
-      "stop_line": 566,
-      "stop_column": 54,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 31,
+      "column": 62,
+      "stop_line": 31,
+      "stop_column": 67,
+      "path": "tests/unit/backend/native/lightning/strategies/test_strategies.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`enclosed_lt` may be uninitialized",
-      "concise_description": "`enclosed_lt` may be uninitialized",
+      "name": "bad-argument-type",
+      "description": "Argument `Literal['xpu']` is not assignable to parameter `accelerator` with type `Accelerator | None` in function `otx.backend.native.lightning.strategies.xpu_single.SingleXPUStrategy.__init__`",
+      "concise_description": "Argument `Literal['xpu']` is not assignable to parameter `accelerator` with type `Accelerator | None` in function `otx.backend.native.lightning.strategies.xpu_single.SingleXPUStrategy.__init__`",
       "severity": "error"
     },
     {
-      "line": 568,
-      "column": 29,
-      "stop_line": 568,
-      "stop_column": 48,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 27,
+      "column": 61,
+      "stop_line": 27,
+      "stop_column": 65,
+      "path": "tests/unit/backend/native/models/classification/backbones/test_otx_efficientnet.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C._VariableFunctions.max` called with arguments: (Tensor, float)\n  Possible overloads:\n  (input: Tensor, *, out: Tensor | None = None) -> Tensor\n  (input: Tensor, other: Tensor, *, out: Tensor | None = None) -> Tensor [closest match]\n  (input: Tensor, dim: int, keepdim: bool = False, *, out: Tensor | list[Tensor] | tuple[Tensor, ...] | None = None) -> max\n  (input: Tensor, dim: EllipsisType | str | None, keepdim: bool = False, *, out: Tensor | list[Tensor] | tuple[Tensor, ...] | None = None) -> max",
-      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.max` called with arguments: (Tensor, float)",
+      "name": "bad-argument-type",
+      "description": "Argument `None` is not assignable to parameter `pretrained` with type `bool` in function `otx.backend.native.models.classification.backbones.efficientnet.EfficientNetBackbone.__new__`",
+      "concise_description": "Argument `None` is not assignable to parameter `pretrained` with type `bool` in function `otx.backend.native.models.classification.backbones.efficientnet.EfficientNetBackbone.__new__`",
       "severity": "error"
     },
     {
-      "line": 646,
-      "column": 35,
-      "stop_line": 646,
-      "stop_column": 47,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 33,
+      "column": 91,
+      "stop_line": 33,
+      "stop_column": 95,
+      "path": "tests/unit/backend/native/models/classification/backbones/test_otx_efficientnet.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`scale_factor` may be uninitialized",
-      "concise_description": "`scale_factor` may be uninitialized",
+      "name": "bad-argument-type",
+      "description": "Argument `None` is not assignable to parameter `pretrained` with type `bool` in function `otx.backend.native.models.classification.backbones.efficientnet.EfficientNetBackbone.__new__`",
+      "concise_description": "Argument `None` is not assignable to parameter `pretrained` with type `bool` in function `otx.backend.native.models.classification.backbones.efficientnet.EfficientNetBackbone.__new__`",
       "severity": "error"
     },
     {
-      "line": 649,
-      "column": 16,
-      "stop_line": 649,
-      "stop_column": 38,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 24,
+      "column": 23,
+      "stop_line": 24,
+      "stop_column": 53,
+      "path": "tests/unit/backend/native/models/classification/backbones/test_timm.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[tuple[int, int], float | int | tuple[float, float] | tuple[int, int] | Unknown]` is not assignable to declared return type `tuple[int, int] | tuple[tuple[int, int], float | int]`",
-      "concise_description": "Returned type `tuple[tuple[int, int], float | int | tuple[float, float] | tuple[int, int] | Unknown]` is not assignable to declared return type `tuple[int, int] | tuple[tuple[int, int], float | int]`",
+      "name": "bad-argument-type",
+      "description": "Argument `str | None` is not assignable to parameter `*args` with type `PathLike[str] | str` in function `pathlib.Path.__new__`",
+      "concise_description": "Argument `str | None` is not assignable to parameter `*args` with type `PathLike[str] | str` in function `pathlib.Path.__new__`",
       "severity": "error"
     },
     {
-      "line": 649,
-      "column": 26,
-      "stop_line": 649,
-      "stop_column": 38,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 71,
+      "column": 59,
+      "stop_line": 71,
+      "stop_column": 65,
+      "path": "tests/unit/backend/native/models/classification/backbones/test_vision_transformer.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`scale_factor` may be uninitialized",
-      "concise_description": "`scale_factor` may be uninitialized",
+      "name": "bad-argument-type",
+      "description": "Argument `str` is not assignable to parameter `peft` with type `Literal['dora', 'lora'] | None` in function `otx.backend.native.models.classification.backbones.vision_transformer.VisionTransformerBackbone.__init__`",
+      "concise_description": "Argument `str` is not assignable to parameter `peft` with type `Literal['dora', 'lora'] | None` in function `otx.backend.native.models.classification.backbones.vision_transformer.VisionTransformerBackbone.__init__`",
       "severity": "error"
     },
     {
-      "line": 669,
-      "column": 16,
-      "stop_line": 669,
-      "stop_column": 57,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 48,
+      "column": 20,
+      "stop_line": 52,
+      "stop_column": 10,
+      "path": "tests/unit/backend/native/models/classification/conftest.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `list[list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]]` is not assignable to declared return type `list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]`",
-      "concise_description": "Returned type `list[list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]]` is not assignable to declared return type `list[ndarray[Unknown, Unknown]] | ndarray[Unknown, Unknown]`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
+      "concise_description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
       "severity": "error"
     },
     {
-      "line": 767,
-      "column": 22,
-      "stop_line": 767,
-      "stop_column": 30,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 109,
+      "column": 20,
+      "stop_line": 116,
+      "stop_column": 10,
+      "path": "tests/unit/backend/native/models/classification/conftest.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.any` called with arguments: (axis=Literal[1])\n  Possible overloads:\n  () -> Tensor [closest match]\n  (dim: Size | list[int] | tuple[int, ...] | None = None, keepdim: bool = False) -> Tensor\n  (dim: int, keepdim: bool = False) -> Tensor\n  (dim: EllipsisType | str | None, keepdim: bool = False) -> Tensor",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.any` called with arguments: (axis=Literal[1])",
+      "name": "bad-argument-type",
+      "description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
+      "concise_description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
       "severity": "error"
     },
     {
-      "line": 768,
-      "column": 22,
-      "stop_line": 768,
-      "stop_column": 30,
-      "path": "src/otx/data/transform_libs/utils.py",
+      "line": 179,
+      "column": 20,
+      "stop_line": 183,
+      "stop_column": 10,
+      "path": "tests/unit/backend/native/models/classification/conftest.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.any` called with arguments: (axis=Literal[2])\n  Possible overloads:\n  () -> Tensor [closest match]\n  (dim: Size | list[int] | tuple[int, ...] | None = None, keepdim: bool = False) -> Tensor\n  (dim: int, keepdim: bool = False) -> Tensor\n  (dim: EllipsisType | str | None, keepdim: bool = False) -> Tensor",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.any` called with arguments: (axis=Literal[2])",
+      "name": "bad-argument-type",
+      "description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
+      "concise_description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
       "severity": "error"
     },
     {
-      "line": 26,
-      "column": 16,
-      "stop_line": 26,
-      "stop_column": 62,
-      "path": "src/otx/data/utils/pre_filtering.py",
+      "line": 217,
+      "column": 33,
+      "stop_line": 217,
+      "stop_column": 53,
+      "path": "tests/unit/backend/native/models/classification/conftest.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Categories`\n  Object of class `Categories` has no attribute `__getitem__`",
-      "concise_description": "Cannot index into `Categories`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)",
       "severity": "error"
     },
     {
-      "line": 27,
-      "column": 12,
-      "stop_line": 27,
-      "stop_column": 54,
-      "path": "src/otx/data/utils/pre_filtering.py",
+      "line": 218,
+      "column": 27,
+      "stop_line": 218,
+      "stop_column": 82,
+      "path": "tests/unit/backend/native/models/classification/conftest.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `Categories` is not assignable to declared return type `list[str]`",
-      "concise_description": "Returned type `Categories` is not assignable to declared return type `list[str]`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])",
       "severity": "error"
     },
     {
-      "line": 51,
-      "column": 20,
-      "stop_line": 51,
-      "stop_column": 80,
-      "path": "src/otx/data/utils/structures/bbox/transforms.py",
+      "line": 31,
+      "column": 27,
+      "stop_line": 31,
+      "stop_column": 38,
+      "path": "tests/unit/backend/native/models/classification/heads/test_hlabel_cls_head.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`Tensor` is not assignable to variable `scale_factor` with type `list[float]`",
-      "concise_description": "`Tensor` is not assignable to variable `scale_factor` with type `list[float]`",
+      "name": "bad-argument-type",
+      "description": "Argument `tuple[Literal[24], Literal[24], Literal[3]]` is not assignable to parameter `ori_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
+      "concise_description": "Argument `tuple[Literal[24], Literal[24], Literal[3]]` is not assignable to parameter `ori_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
       "severity": "error"
     },
     {
-      "line": 52,
-      "column": 12,
-      "stop_line": 52,
-      "stop_column": 32,
-      "path": "src/otx/data/utils/structures/bbox/transforms.py",
+      "line": 32,
+      "column": 27,
+      "stop_line": 32,
+      "stop_column": 38,
+      "path": "tests/unit/backend/native/models/classification/heads/test_hlabel_cls_head.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `list[float]` is not assignable to declared return type `Tensor`",
-      "concise_description": "Returned type `list[float]` is not assignable to declared return type `Tensor`",
+      "name": "bad-argument-type",
+      "description": "Argument `tuple[Literal[24], Literal[24], Literal[3]]` is not assignable to parameter `img_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
+      "concise_description": "Argument `tuple[Literal[24], Literal[24], Literal[3]]` is not assignable to parameter `img_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
       "severity": "error"
     },
     {
-      "line": 46,
-      "column": 23,
-      "stop_line": 53,
+      "line": 17,
+      "column": 12,
+      "stop_line": 20,
       "stop_column": 6,
-      "path": "src/otx/data/utils/structures/mask/mask_target.py",
+      "path": "tests/unit/backend/native/models/classification/heads/test_multilabel_cls_head.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `map.__new__` called with arguments: (type[map[_S]], (pos_proposals: Tensor, pos_assigned_gt_inds: Tensor, gt_masks: Mask, mask_size: list[int], meta_info: dict[Unknown, Unknown]) -> Tensor, list[Tensor], list[Tensor], list[Mask] | list[ndarray[Unknown, Unknown]], list[int], list[dict[Unknown, Unknown]])\n  Possible overloads:\n  (cls: type[map[_S]], func: (_T1) -> _S, iterable: Iterable[_T1], /) -> map[_S]\n  (cls: type[map[_S]], func: (_T1, _T2) -> _S, iterable: Iterable[_T1], iter2: Iterable[_T2], /) -> map[_S]\n  (cls: type[map[_S]], func: (_T1, _T2, _T3) -> _S, iterable: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], /) -> map[_S]\n  (cls: type[map[_S]], func: (_T1, _T2, _T3, _T4) -> _S, iterable: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], iter4: Iterable[_T4], /) -> map[_S]\n  (cls: type[map[_S]], func: (_T1, _T2, _T3, _T4, _T5) -> _S, iterable: Iterable[_T1], iter2: Iterable[_T2], iter3: Iterable[_T3], iter4: Iterable[_T4], iter5: Iterable[_T5], /) -> map[_S] [closest match]\n  (cls: type[map[_S]], func: (...) -> _S, iterable: Iterable[Any], iter2: Iterable[Any], iter3: Iterable[Any], iter4: Iterable[Any], iter5: Iterable[Any], iter6: Iterable[Any], /, *iterables: Iterable[Any]) -> map[_S]",
-      "concise_description": "No matching overload found for function `map.__new__` called with arguments: (type[map[_S]], (pos_proposals: Tensor, pos_assigned_gt_inds: Tensor, gt_masks: Mask, mask_size: list[int], meta_info: dict[Unknown, Unknown]) -> Tensor, list[Tensor], list[Tensor], list[Mask] | list[ndarray[Unknown, Unknown]], list[int], list[dict[Unknown, Unknown]])",
+      "name": "bad-return",
+      "description": "Returned type `MultiLabelLinearClsHead` is not assignable to declared return type `None`",
+      "concise_description": "Returned type `MultiLabelLinearClsHead` is not assignable to declared return type `None`",
       "severity": "error"
     },
     {
-      "line": 57,
+      "line": 25,
       "column": 12,
-      "stop_line": 57,
-      "stop_column": 25,
-      "path": "src/otx/data/utils/structures/mask/mask_target.py",
+      "stop_line": 30,
+      "stop_column": 6,
+      "path": "tests/unit/backend/native/models/classification/heads/test_multilabel_cls_head.py",
       "code": -2,
       "name": "bad-return",
-      "description": "Returned type `Tensor | list[Unknown]` is not assignable to declared return type `Tensor`",
-      "concise_description": "Returned type `Tensor | list[Unknown]` is not assignable to declared return type `Tensor`",
+      "description": "Returned type `MultiLabelNonLinearClsHead` is not assignable to declared return type `None`",
+      "concise_description": "Returned type `MultiLabelNonLinearClsHead` is not assignable to declared return type `None`",
       "severity": "error"
     },
     {
-      "line": 79,
-      "column": 17,
-      "stop_line": 79,
-      "stop_column": 33,
-      "path": "src/otx/data/utils/structures/mask/mask_target.py",
+      "line": 38,
+      "column": 22,
+      "stop_line": 44,
+      "stop_column": 14,
+      "path": "tests/unit/backend/native/models/classification/heads/test_multilabel_cls_head.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`tuple[Unknown, ...]` is not assignable to variable `mask_size` with type `list[int]`",
-      "concise_description": "`tuple[Unknown, ...]` is not assignable to variable `mask_size` with type `list[int]`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, scale_factor=tuple[float, float], ori_shape=tuple[Literal[281], Literal[500]], img_shape=tuple[Literal[224], Literal[224]], ignored_labels=list[@_])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, scale_factor=tuple[float, float], ori_shape=tuple[Literal[281], Literal[500]], img_shape=tuple[Literal[224], Literal[224]], ignored_labels=list[@_])",
       "severity": "error"
     },
     {
-      "line": 95,
-      "column": 32,
-      "stop_line": 95,
-      "stop_column": 66,
-      "path": "src/otx/data/utils/structures/mask/mask_target.py",
+      "line": 55,
+      "column": 22,
+      "stop_line": 61,
+      "stop_column": 14,
+      "path": "tests/unit/backend/native/models/classification/heads/test_multilabel_cls_head.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`ndarray[Unknown, Unknown]` is not assignable to variable `pos_assigned_gt_inds` with type `Tensor`",
-      "concise_description": "`ndarray[Unknown, Unknown]` is not assignable to variable `pos_assigned_gt_inds` with type `Tensor`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, scale_factor=tuple[float, float], ori_shape=tuple[Literal[281], Literal[500]], img_shape=tuple[Literal[224], Literal[224]], ignored_labels=list[int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, scale_factor=tuple[float, float], ori_shape=tuple[Literal[281], Literal[500]], img_shape=tuple[Literal[224], Literal[224]], ignored_labels=list[int])",
       "severity": "error"
     },
     {
-      "line": 101,
-      "column": 18,
-      "stop_line": 101,
-      "stop_column": 38,
-      "path": "src/otx/data/utils/structures/mask/mask_target.py",
+      "line": 63,
+      "column": 16,
+      "stop_line": 63,
+      "stop_column": 44,
+      "path": "tests/unit/backend/native/models/classification/test_efficientnet.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `inds` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.crop_and_resize_masks`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `inds` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.crop_and_resize_masks`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `in_size`",
+      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
       "severity": "error"
     },
     {
-      "line": 102,
-      "column": 20,
-      "stop_line": 102,
-      "stop_column": 26,
-      "path": "src/otx/data/utils/structures/mask/mask_target.py",
+      "line": 135,
+      "column": 16,
+      "stop_line": 135,
+      "stop_column": 44,
+      "path": "tests/unit/backend/native/models/classification/test_efficientnet.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `device` is not assignable to parameter `device` with type `str` in function `otx.data.utils.structures.mask.mask_util.crop_and_resize_masks`",
-      "concise_description": "Argument `device` is not assignable to parameter `device` with type `str` in function `otx.data.utils.structures.mask.mask_util.crop_and_resize_masks`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `in_size`",
+      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
       "severity": "error"
     },
     {
-      "line": 111,
-      "column": 18,
-      "stop_line": 111,
-      "stop_column": 60,
-      "path": "src/otx/data/utils/structures/mask/mask_util.py",
+      "line": 207,
+      "column": 16,
+      "stop_line": 207,
+      "stop_column": 44,
+      "path": "tests/unit/backend/native/models/classification/test_efficientnet.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`Tensor` is not assignable to variable `bboxes` with type `ndarray[Unknown, Unknown]`",
-      "concise_description": "`Tensor` is not assignable to variable `bboxes` with type `ndarray[Unknown, Unknown]`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `in_size`",
+      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
       "severity": "error"
     },
     {
-      "line": 113,
+      "line": 63,
       "column": 16,
-      "stop_line": 113,
-      "stop_column": 56,
-      "path": "src/otx/data/utils/structures/mask/mask_util.py",
+      "stop_line": 63,
+      "stop_column": 44,
+      "path": "tests/unit/backend/native/models/classification/test_mobilenet_v3.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`Tensor` is not assignable to variable `inds` with type `ndarray[Unknown, Unknown]`",
-      "concise_description": "`Tensor` is not assignable to variable `inds` with type `ndarray[Unknown, Unknown]`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `in_size`",
+      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
       "severity": "error"
     },
     {
-      "line": 117,
-      "column": 21,
-      "stop_line": 117,
-      "stop_column": 49,
-      "path": "src/otx/data/utils/structures/mask/mask_util.py",
+      "line": 135,
+      "column": 16,
+      "stop_line": 135,
+      "stop_column": 44,
+      "path": "tests/unit/backend/native/models/classification/test_mobilenet_v3.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C._VariableFunctions.cat` called with arguments: (list[Tensor | ndarray[Unknown, Unknown]], dim=Literal[1])\n  Possible overloads:\n  (tensors: list[Tensor] | tuple[Tensor, ...] | None, dim: int = 0, *, out: Tensor | None = None) -> Tensor [closest match]\n  (tensors: list[Tensor] | tuple[Tensor, ...] | None, dim: EllipsisType | str | None, *, out: Tensor | None = None) -> Tensor",
-      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.cat` called with arguments: (list[Tensor | ndarray[Unknown, Unknown]], dim=Literal[1])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `in_size`",
+      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
       "severity": "error"
     },
     {
-      "line": 120,
-      "column": 41,
-      "stop_line": 120,
-      "stop_column": 50,
-      "path": "src/otx/data/utils/structures/mask/mask_util.py",
+      "line": 207,
+      "column": 16,
+      "stop_line": 207,
+      "stop_column": 44,
+      "path": "tests/unit/backend/native/models/classification/test_mobilenet_v3.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.index_select` called with arguments: (Literal[0], ndarray[Unknown, Unknown])\n  Possible overloads:\n  (dim: int, index: Tensor) -> Tensor [closest match]\n  (dim: EllipsisType | str | None, index: Tensor) -> Tensor",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.index_select` called with arguments: (Literal[0], ndarray[Unknown, Unknown])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `in_size`",
+      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
       "severity": "error"
     },
     {
-      "line": 121,
-      "column": 19,
-      "stop_line": 121,
-      "stop_column": 28,
-      "path": "src/otx/data/utils/structures/mask/mask_util.py",
+      "line": 111,
+      "column": 20,
+      "stop_line": 111,
+      "stop_column": 45,
+      "path": "tests/unit/backend/native/models/classification/test_torchvision_model.py",
       "code": -2,
-      "name": "not-callable",
-      "description": "Expected a callable, got `str`",
-      "concise_description": "Expected a callable, got `str`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 125,
-      "column": 12,
-      "stop_line": 125,
-      "stop_column": 31,
-      "path": "src/otx/data/utils/structures/mask/mask_util.py",
+      "line": 112,
+      "column": 20,
+      "stop_line": 112,
+      "stop_column": 43,
+      "path": "tests/unit/backend/native/models/classification/test_torchvision_model.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `bool` has no attribute `float`",
-      "concise_description": "Object of class `bool` has no attribute `float`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 41,
-      "column": 39,
-      "stop_line": 41,
-      "stop_column": 47,
-      "path": "src/otx/data/utils/utils.py",
+      "line": 113,
+      "column": 20,
+      "stop_line": 113,
+      "stop_column": 43,
+      "path": "tests/unit/backend/native/models/classification/test_torchvision_model.py",
       "code": -2,
-      "name": "not-a-type",
-      "description": "Expected a type form, got instance of `Overload[\n  [_ArrayType: ndarray[Any, Any]](object: _ArrayType, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: Literal[True], ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> _ArrayType\n  [_ArrayType: ndarray[Any, Any]](object: numpy._core.multiarray._SupportsArray[_ArrayType], dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: Literal[True], ndmin: Literal[0] = ..., like: _SupportsArrayFunc | None = ...) -> _ArrayType\n  [_SCT: generic[Any]](object: _ArrayLike, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[_SCT]]\n  (object: object, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[Any]]\n  [_SCT: generic[Any]](object: Any, dtype: _DTypeLike, *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[_SCT]]\n  (object: Any, dtype: DTypeLike, *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[Any]]\n]`",
-      "concise_description": "Expected a type form, got instance of `Overload[\n  [_ArrayType: ndarray[Any, Any]](object: _ArrayType, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: Literal[True], ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> _ArrayType\n  [_ArrayType: ndarray[Any, Any]](object: numpy._core.multiarray._SupportsArray[_ArrayType], dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: Literal[True], ndmin: Literal[0] = ..., like: _SupportsArrayFunc | None = ...) -> _ArrayType\n  [_SCT: generic[Any]](object: _ArrayLike, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[_SCT]]\n  (object: object, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[Any]]\n  [_SCT: generic[Any]](object: Any, dtype: _DTypeLike, *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[_SCT]]\n  (object: Any, dtype: DTypeLike, *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[Any]]\n]`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 74,
-      "column": 45,
-      "stop_line": 74,
-      "stop_column": 53,
-      "path": "src/otx/data/utils/utils.py",
+      "line": 13,
+      "column": 34,
+      "stop_line": 13,
+      "stop_column": 47,
+      "path": "tests/unit/backend/native/models/classification/utils/test_attention_with_peft.py",
       "code": -2,
-      "name": "not-a-type",
-      "description": "Expected a type form, got instance of `Overload[\n  [_ArrayType: ndarray[Any, Any]](object: _ArrayType, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: Literal[True], ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> _ArrayType\n  [_ArrayType: ndarray[Any, Any]](object: numpy._core.multiarray._SupportsArray[_ArrayType], dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: Literal[True], ndmin: Literal[0] = ..., like: _SupportsArrayFunc | None = ...) -> _ArrayType\n  [_SCT: generic[Any]](object: _ArrayLike, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[_SCT]]\n  (object: object, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[Any]]\n  [_SCT: generic[Any]](object: Any, dtype: _DTypeLike, *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[_SCT]]\n  (object: Any, dtype: DTypeLike, *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[Any]]\n]`",
-      "concise_description": "Expected a type form, got instance of `Overload[\n  [_ArrayType: ndarray[Any, Any]](object: _ArrayType, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: Literal[True], ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> _ArrayType\n  [_ArrayType: ndarray[Any, Any]](object: numpy._core.multiarray._SupportsArray[_ArrayType], dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: Literal[True], ndmin: Literal[0] = ..., like: _SupportsArrayFunc | None = ...) -> _ArrayType\n  [_SCT: generic[Any]](object: _ArrayLike, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[_SCT]]\n  (object: object, dtype: None = ..., *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[Any]]\n  [_SCT: generic[Any]](object: Any, dtype: _DTypeLike, *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[_SCT]]\n  (object: Any, dtype: DTypeLike, *, copy: _CopyMode | bool | None = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., like: _SupportsArrayFunc | None = ...) -> ndarray[tuple[int, ...], dtype[Any]]\n]`",
+      "name": "bad-argument-type",
+      "description": "Argument `Linear` is not assignable to parameter `qkv` with type `Attention` in function `otx.backend.native.models.classification.utils.peft.AttentionWithLoRA.__init__`",
+      "concise_description": "Argument `Linear` is not assignable to parameter `qkv` with type `Attention` in function `otx.backend.native.models.classification.utils.peft.AttentionWithLoRA.__init__`",
       "severity": "error"
     },
     {
-      "line": 134,
-      "column": 25,
-      "stop_line": 134,
-      "stop_column": 35,
-      "path": "src/otx/data/utils/utils.py",
+      "line": 21,
+      "column": 34,
+      "stop_line": 21,
+      "stop_column": 47,
+      "path": "tests/unit/backend/native/models/classification/utils/test_attention_with_peft.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `media`",
-      "concise_description": "Object of class `NoneType` has no attribute `media`",
+      "name": "bad-argument-type",
+      "description": "Argument `Linear` is not assignable to parameter `qkv` with type `Attention` in function `otx.backend.native.models.classification.utils.peft.AttentionWithDoRA.__init__`",
+      "concise_description": "Argument `Linear` is not assignable to parameter `qkv` with type `Attention` in function `otx.backend.native.models.classification.utils.peft.AttentionWithDoRA.__init__`",
       "severity": "error"
     },
     {
-      "line": 146,
-      "column": 20,
-      "stop_line": 146,
-      "stop_column": 36,
-      "path": "src/otx/data/utils/utils.py",
+      "line": 12,
+      "column": 18,
+      "stop_line": 12,
+      "stop_column": 92,
+      "path": "tests/unit/backend/native/models/classification/utils/test_ignored_labels.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `annotations`",
-      "concise_description": "Object of class `NoneType` has no attribute `annotations`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])",
       "severity": "error"
     },
     {
-      "line": 149,
-      "column": 36,
-      "stop_line": 149,
-      "stop_column": 80,
-      "path": "src/otx/data/utils/utils.py",
+      "line": 13,
+      "column": 18,
+      "stop_line": 13,
+      "stop_column": 95,
+      "path": "tests/unit/backend/native/models/classification/utils/test_ignored_labels.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Categories`\n  Object of class `Categories` has no attribute `__getitem__`",
-      "concise_description": "Cannot index into `Categories`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[1], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[1], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])",
       "severity": "error"
     },
     {
-      "line": 154,
-      "column": 29,
-      "stop_line": 154,
-      "stop_column": 36,
-      "path": "src/otx/data/utils/utils.py",
+      "line": 14,
+      "column": 18,
+      "stop_line": 14,
+      "stop_column": 89,
+      "path": "tests/unit/backend/native/models/classification/utils/test_ignored_labels.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `dict[str, list[Unknown]]`\n  Argument `type[Polygon]` is not assignable to parameter `key` with type `str` in function `dict.__getitem__`",
-      "concise_description": "Cannot index into `dict[str, list[Unknown]]`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[2], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[2], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])",
       "severity": "error"
     },
     {
-      "line": 181,
-      "column": 99,
-      "stop_line": 181,
-      "stop_column": 114,
-      "path": "src/otx/data/utils/utils.py",
+      "line": 53,
+      "column": 33,
+      "stop_line": 53,
+      "stop_column": 36,
+      "path": "tests/unit/backend/native/models/common/backbones/test_pytorchcv_backbones.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `dict[str, list[Unknown]]`\n  Argument `type[Bbox] | type[Polygon] | str | None` is not assignable to parameter `key` with type `str` in function `dict.__getitem__`",
-      "concise_description": "Cannot index into `dict[str, list[Unknown]]`",
+      "name": "bad-argument-type",
+      "description": "Argument `dict[str, str]` is not assignable to parameter `normalization` with type `(...) -> Module` in function `otx.backend.native.models.common.backbones.pytorchcv_backbones.replace_norm`",
+      "concise_description": "Argument `dict[str, str]` is not assignable to parameter `normalization` with type `(...) -> Module` in function `otx.backend.native.models.common.backbones.pytorchcv_backbones.replace_norm`",
       "severity": "error"
     },
     {
-      "line": 196,
-      "column": 26,
-      "stop_line": 196,
-      "stop_column": 41,
-      "path": "src/otx/data/utils/utils.py",
+      "line": 21,
+      "column": 24,
+      "stop_line": 21,
+      "stop_column": 43,
+      "path": "tests/unit/backend/native/models/common/test_iou2d_calculator.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Dataset` has no attribute `subsets`",
-      "concise_description": "Object of class `Dataset` has no attribute `subsets`",
+      "name": "bad-assignment",
+      "description": "`signedinteger[_64Bit]` is not assignable to variable `num_bbox` with type `int | None`",
+      "concise_description": "`signedinteger[_64Bit]` is not assignable to variable `num_bbox` with type `int | None`",
       "severity": "error"
     },
     {
-      "line": 196,
-      "column": 60,
-      "stop_line": 196,
-      "stop_column": 75,
-      "path": "src/otx/data/utils/utils.py",
+      "line": 22,
+      "column": 26,
+      "stop_line": 22,
+      "stop_column": 41,
+      "path": "tests/unit/backend/native/models/common/test_iou2d_calculator.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Dataset` has no attribute `subsets`",
-      "concise_description": "Object of class `Dataset` has no attribute `subsets`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C._VariableFunctions.rand` called with arguments: (tuple[int | None, Literal[2]])\n  Possible overloads:\n  (size: Sequence[SymInt | int], *, generator: Generator | None, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, generator: Generator | None, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor [closest match]\n  (*size: SymInt | int, *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor",
+      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.rand` called with arguments: (tuple[int | None, Literal[2]])",
       "severity": "error"
     },
     {
-      "line": 37,
-      "column": 38,
-      "stop_line": 37,
+      "line": 23,
+      "column": 36,
+      "stop_line": 23,
       "stop_column": 51,
-      "path": "src/otx/engine/__init__.py",
+      "path": "tests/unit/backend/native/models/common/test_iou2d_calculator.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `type[Engine]` is not assignable to parameter `object` with type `type[OTXEngine] | type[OVEngine]` in function `list.append`",
-      "concise_description": "Argument `type[Engine]` is not assignable to parameter `object` with type `type[OTXEngine] | type[OVEngine]` in function `list.append`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C._VariableFunctions.rand` called with arguments: (tuple[int | None, Literal[2]])\n  Possible overloads:\n  (size: Sequence[SymInt | int], *, generator: Generator | None, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, generator: Generator | None, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor [closest match]\n  (*size: SymInt | int, *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor",
+      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.rand` called with arguments: (tuple[int | None, Literal[2]])",
       "severity": "error"
     },
     {
-      "line": 60,
+      "line": 27,
       "column": 16,
-      "stop_line": 60,
+      "stop_line": 27,
       "stop_column": 32,
-      "path": "src/otx/metrics/accuracy.py",
+      "path": "tests/unit/backend/native/models/common/test_iou2d_calculator.py",
       "code": -2,
       "name": "bad-return",
-      "description": "Returned type `Metric` is not assignable to declared return type `NamedConfusionMatrix`",
-      "concise_description": "Returned type `Metric` is not assignable to declared return type `NamedConfusionMatrix`",
+      "description": "Returned type `tuple[Tensor, int | None]` is not assignable to declared return type `tuple[Tensor, int]`",
+      "concise_description": "Returned type `tuple[Tensor, int | None]` is not assignable to declared return type `tuple[Tensor, int]`",
       "severity": "error"
     },
     {
-      "line": 104,
+      "line": 35,
+      "column": 35,
+      "stop_line": 35,
+      "stop_column": 51,
+      "path": "tests/unit/backend/native/models/detection/backbones/test_presnet.py",
+      "code": -2,
+      "name": "bad-argument-type",
+      "description": "Argument `(normalization: ((...) -> Module) | Module | tuple[str, Module] | None, num_features: int, postfix: int | str = '', layer_name: str | None = None, requires_grad: bool = True, eps: float = ..., **kwargs: Unknown) -> tuple[str, Module]` is not assignable to parameter `func` with type `(...) -> Module` in function `functools.partial.__new__`",
+      "concise_description": "Argument `(normalization: ((...) -> Module) | Module | tuple[str, Module] | None, num_features: int, postfix: int | str = '', layer_name: str | None = None, requires_grad: bool = True, eps: float = ..., **kwargs: Unknown) -> tuple[str, Module]` is not assignable to parameter `func` with type `(...) -> Module` in function `functools.partial.__new__`",
+      "severity": "error"
+    },
+    {
+      "line": 467,
       "column": 9,
-      "stop_line": 104,
-      "stop_column": 15,
-      "path": "src/otx/metrics/accuracy.py",
+      "stop_line": 467,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/models/detection/backbones/test_vit_tiny.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `AccuracywithLabelGroup.update` overrides parent class `Metric` in an inconsistent manner\n  `AccuracywithLabelGroup.update` has type `BoundMethod[AccuracywithLabelGroup, (self: AccuracywithLabelGroup, preds: Tensor, target: Tensor) -> None]`, which is not assignable to `BoundMethod[AccuracywithLabelGroup, (self: AccuracywithLabelGroup, *_: Any, **__: Any) -> None]`, the type of `Metric.update`",
-      "concise_description": "Class member `AccuracywithLabelGroup.update` overrides parent class `Metric` in an inconsistent manner",
+      "name": "missing-attribute",
+      "description": "Object of class `int` has no attribute `backward`",
+      "concise_description": "Object of class `int` has no attribute `backward`",
       "severity": "error"
     },
     {
-      "line": 131,
-      "column": 67,
-      "stop_line": 131,
-      "stop_column": 80,
-      "path": "src/otx/metrics/accuracy.py",
+      "line": 479,
+      "column": 9,
+      "stop_line": 479,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/models/detection/backbones/test_vit_tiny.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[NamedConfusionMatrix]` is not assignable to parameter `conf_matrices` with type `Tensor` in function `AccuracywithLabelGroup._compute_accuracy_from_conf_matrices`",
-      "concise_description": "Argument `list[NamedConfusionMatrix]` is not assignable to parameter `conf_matrices` with type `Tensor` in function `AccuracywithLabelGroup._compute_accuracy_from_conf_matrices`",
+      "name": "missing-attribute",
+      "description": "Object of class `int` has no attribute `backward`",
+      "concise_description": "Object of class `int` has no attribute `backward`",
       "severity": "error"
     },
     {
-      "line": 284,
-      "column": 63,
-      "stop_line": 290,
+      "line": 20,
+      "column": 33,
+      "stop_line": 24,
       "stop_column": 10,
-      "path": "src/otx/metrics/accuracy.py",
+      "path": "tests/unit/backend/native/models/detection/conftest.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[Metric]` is not assignable to attribute `multiclass_head_accuracy` with type `list[Accuracy]`",
-      "concise_description": "`list[Metric]` is not assignable to attribute `multiclass_head_accuracy` with type `list[Accuracy]`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])",
       "severity": "error"
     },
     {
-      "line": 306,
-      "column": 41,
-      "stop_line": 312,
+      "line": 25,
+      "column": 33,
+      "stop_line": 29,
       "stop_column": 10,
-      "path": "src/otx/metrics/accuracy.py",
+      "path": "tests/unit/backend/native/models/detection/conftest.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[Module]` is not assignable to attribute `multiclass_head_accuracy` with type `list[Accuracy]`",
-      "concise_description": "`list[Module]` is not assignable to attribute `multiclass_head_accuracy` with type `list[Accuracy]`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])",
       "severity": "error"
     },
     {
-      "line": 317,
-      "column": 9,
-      "stop_line": 317,
-      "stop_column": 15,
-      "path": "src/otx/metrics/accuracy.py",
-      "code": -2,
-      "name": "bad-override",
-      "description": "Class member `MixedHLabelAccuracy.update` overrides parent class `Metric` in an inconsistent manner\n  `MixedHLabelAccuracy.update` has type `BoundMethod[MixedHLabelAccuracy, (self: MixedHLabelAccuracy, preds: Tensor, target: Tensor) -> None]`, which is not assignable to `BoundMethod[MixedHLabelAccuracy, (self: MixedHLabelAccuracy, *_: Any, **__: Any) -> None]`, the type of `Metric.update`",
-      "concise_description": "Class member `MixedHLabelAccuracy.update` overrides parent class `Metric` in an inconsistent manner",
+      "line": 38,
+      "column": 18,
+      "stop_line": 38,
+      "stop_column": 73,
+      "path": "tests/unit/backend/native/models/detection/conftest.py",
+      "code": -2,
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
       "severity": "error"
     },
     {
-      "line": 343,
-      "column": 17,
-      "stop_line": 343,
+      "line": 39,
+      "column": 18,
+      "stop_line": 39,
       "stop_column": 73,
-      "path": "src/otx/metrics/accuracy.py",
+      "path": "tests/unit/backend/native/models/detection/conftest.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[None]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
-      "concise_description": "Argument `list[None]` is not assignable to parameter `tensors` with type `list[Tensor] | tuple[Tensor, ...] | None` in function `torch._C._VariableFunctions.stack`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
       "severity": "error"
     },
     {
-      "line": 678,
-      "column": 9,
-      "stop_line": 678,
-      "stop_column": 15,
-      "path": "src/otx/metrics/fmeasure.py",
+      "line": 84,
+      "column": 37,
+      "stop_line": 84,
+      "stop_column": 59,
+      "path": "tests/unit/backend/native/models/detection/detectors/test_detr.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `FMeasure.update` overrides parent class `Metric` in an inconsistent manner\n  `FMeasure.update` has type `BoundMethod[FMeasure, (self: FMeasure, preds: list[dict[str, Tensor]], target: list[dict[str, Tensor]]) -> None]`, which is not assignable to `BoundMethod[FMeasure, (self: FMeasure, *_: Any, **__: Any) -> None]`, the type of `Metric.update`",
-      "concise_description": "Class member `FMeasure.update` overrides parent class `Metric` in an inconsistent manner",
+      "name": "implicit-import",
+      "description": "Module `torchvision.tv_tensors` exists, but was not imported explicitly. You are relying on other modules to load it.",
+      "concise_description": "Module `torchvision.tv_tensors` exists, but was not imported explicitly. You are relying on other modules to load it.",
       "severity": "error"
     },
     {
-      "line": 811,
-      "column": 38,
-      "stop_line": 811,
-      "stop_column": 48,
-      "path": "src/otx/metrics/fmeasure.py",
+      "line": 53,
+      "column": 23,
+      "stop_line": 53,
+      "stop_column": 83,
+      "path": "tests/unit/backend/native/models/detection/detectors/test_single_stage_detector.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `box_format` with type `Literal['cxcywh', 'xywh', 'xyxy']` in function `torchmetrics.detection.mean_ap.MeanAveragePrecision.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `box_format` with type `Literal['cxcywh', 'xywh', 'xyxy']` in function `torchmetrics.detection.mean_ap.MeanAveragePrecision.__init__`",
+      "description": "Argument `ImageInfo` is not assignable to parameter `imgs_info` with type `Sequence[ImageInfo | None] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
+      "concise_description": "Argument `ImageInfo` is not assignable to parameter `imgs_info` with type `Sequence[ImageInfo | None] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 811,
-      "column": 50,
-      "stop_line": 811,
-      "stop_column": 58,
-      "path": "src/otx/metrics/fmeasure.py",
+      "line": 53,
+      "column": 32,
+      "stop_line": 53,
+      "stop_column": 83,
+      "path": "tests/unit/backend/native/models/detection/detectors/test_single_stage_detector.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `iou_type` with type `Literal['bbox', 'segm'] | tuple[str]` in function `torchmetrics.detection.mean_ap.MeanAveragePrecision.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `iou_type` with type `Literal['bbox', 'segm'] | tuple[str]` in function `torchmetrics.detection.mean_ap.MeanAveragePrecision.__init__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])",
       "severity": "error"
     },
     {
-      "line": 79,
-      "column": 5,
-      "stop_line": 79,
-      "stop_column": 22,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 55,
+      "column": 20,
+      "stop_line": 55,
+      "stop_column": 58,
+      "path": "tests/unit/backend/native/models/detection/detectors/test_single_stage_detector.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `LeafAccuracy.full_state_update` overrides parent class `Metric` in an inconsistent manner\n  `LeafAccuracy.full_state_update` has type `bool`, which is not consistent with `bool | None` in `Metric.full_state_update` (the type of read-write attributes cannot be changed)",
-      "concise_description": "Class member `LeafAccuracy.full_state_update` overrides parent class `Metric` in an inconsistent manner",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 105,
-      "column": 13,
-      "stop_line": 105,
-      "stop_column": 38,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 28,
+      "column": 24,
+      "stop_line": 31,
+      "stop_column": 14,
+      "path": "tests/unit/backend/native/models/detection/heads/test_class_incremental_mixin.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
+      "name": "bad-argument-type",
+      "description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.detection.losses.atss_loss.ATSSCriterion.__init__`",
+      "concise_description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.detection.losses.atss_loss.ATSSCriterion.__init__`",
       "severity": "error"
     },
     {
-      "line": 105,
-      "column": 13,
-      "stop_line": 105,
-      "stop_column": 38,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 41,
+      "column": 30,
+      "stop_line": 51,
+      "stop_column": 14,
+      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "Cannot set item in `Module`\n  Expected `__setitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot set item in `Module`",
+      "name": "bad-argument-type",
+      "description": "Argument `SSDAnchorGeneratorClustered` is not assignable to parameter `anchor_generator` with type `Module` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
+      "concise_description": "Argument `SSDAnchorGeneratorClustered` is not assignable to parameter `anchor_generator` with type `Module` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
       "severity": "error"
     },
     {
-      "line": 106,
-      "column": 13,
-      "stop_line": 106,
-      "stop_column": 40,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 52,
+      "column": 24,
+      "stop_line": 55,
+      "stop_column": 14,
+      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
+      "name": "bad-argument-type",
+      "description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
+      "concise_description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
       "severity": "error"
     },
     {
-      "line": 106,
-      "column": 13,
-      "stop_line": 106,
-      "stop_column": 40,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 60,
+      "column": 23,
+      "stop_line": 60,
+      "stop_column": 32,
+      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "Cannot set item in `Module`\n  Expected `__setitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot set item in `Module`",
+      "name": "bad-argument-type",
+      "description": "Argument `DictConfig` is not assignable to parameter `train_cfg` with type `dict[Unknown, Unknown]` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
+      "concise_description": "Argument `DictConfig` is not assignable to parameter `train_cfg` with type `dict[Unknown, Unknown]` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
       "severity": "error"
     },
     {
-      "line": 110,
-      "column": 17,
-      "stop_line": 110,
-      "stop_column": 48,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 61,
+      "column": 22,
+      "stop_line": 61,
+      "stop_column": 30,
+      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
       "code": -2,
-      "name": "not-callable",
-      "description": "Expected a callable, got `Tensor`",
-      "concise_description": "Expected a callable, got `Tensor`",
+      "name": "bad-argument-type",
+      "description": "Argument `DictConfig` is not assignable to parameter `test_cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
+      "concise_description": "Argument `DictConfig` is not assignable to parameter `test_cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
       "severity": "error"
     },
     {
-      "line": 111,
-      "column": 25,
-      "stop_line": 111,
-      "stop_column": 71,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 71,
+      "column": 16,
+      "stop_line": 71,
+      "stop_column": 48,
+      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`/` is not supported between `Module` and `Tensor`\n  Expected `__truediv__` to be a callable, got `Module | Tensor`",
-      "concise_description": "`/` is not supported between `Module` and `Tensor`",
+      "name": "missing-attribute",
+      "description": "Object of class `BasePriorGenerator` has no attribute `widths`",
+      "concise_description": "Object of class `BasePriorGenerator` has no attribute `widths`",
       "severity": "error"
     },
     {
-      "line": 118,
-      "column": 5,
-      "stop_line": 118,
-      "stop_column": 22,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 75,
+      "column": 16,
+      "stop_line": 75,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `FullPathAccuracy.full_state_update` overrides parent class `Metric` in an inconsistent manner\n  `FullPathAccuracy.full_state_update` has type `bool`, which is not consistent with `bool | None` in `Metric.full_state_update` (the type of read-write attributes cannot be changed)",
-      "concise_description": "Class member `FullPathAccuracy.full_state_update` overrides parent class `Metric` in an inconsistent manner",
+      "name": "missing-attribute",
+      "description": "Object of class `BasePriorGenerator` has no attribute `heights`",
+      "concise_description": "Object of class `BasePriorGenerator` has no attribute `heights`",
       "severity": "error"
     },
     {
-      "line": 130,
-      "column": 9,
-      "stop_line": 130,
-      "stop_column": 38,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 80,
+      "column": 16,
+      "stop_line": 80,
+      "stop_column": 42,
+      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`+=` is not supported between `Module` and `Tensor`\n  Expected `__iadd__` to be a callable, got `Module | Tensor`",
-      "concise_description": "`+=` is not supported between `Module` and `Tensor`",
+      "name": "missing-attribute",
+      "description": "Object of class `BaseBBoxCoder` has no attribute `means`",
+      "concise_description": "Object of class `BaseBBoxCoder` has no attribute `means`",
       "severity": "error"
     },
     {
-      "line": 131,
-      "column": 9,
-      "stop_line": 131,
-      "stop_column": 36,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 81,
+      "column": 16,
+      "stop_line": 81,
+      "stop_column": 41,
+      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`+=` is not supported between `Module` and `int`\n  Expected `__iadd__` to be a callable, got `Module | Tensor`",
-      "concise_description": "`+=` is not supported between `Module` and `int`",
+      "name": "missing-attribute",
+      "description": "Object of class `BaseBBoxCoder` has no attribute `stds`",
+      "concise_description": "Object of class `BaseBBoxCoder` has no attribute `stds`",
       "severity": "error"
     },
     {
-      "line": 135,
-      "column": 16,
-      "stop_line": 135,
-      "stop_column": 70,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 28,
+      "column": 109,
+      "stop_line": 28,
+      "stop_column": 117,
+      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`/` is not supported between `Module` and `Tensor`\n  Expected `__truediv__` to be a callable, got `Module | Tensor`",
-      "concise_description": "`/` is not supported between `Module` and `Tensor`",
+      "name": "bad-argument-type",
+      "description": "Argument `DictConfig` is not assignable to parameter `test_cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.__init__`",
+      "concise_description": "Argument `DictConfig` is not assignable to parameter `test_cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.__init__`",
       "severity": "error"
     },
     {
-      "line": 135,
-      "column": 39,
-      "stop_line": 135,
-      "stop_column": 59,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 30,
+      "column": 61,
+      "stop_line": 30,
+      "stop_column": 65,
+      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
       "code": -2,
-      "name": "not-callable",
-      "description": "Expected a callable, got `Tensor`",
-      "concise_description": "Expected a callable, got `Tensor`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.forward`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.forward`",
       "severity": "error"
     },
     {
-      "line": 141,
-      "column": 5,
-      "stop_line": 141,
-      "stop_column": 22,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 31,
+      "column": 83,
+      "stop_line": 31,
+      "stop_column": 91,
+      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `InconsistentPathRatio.full_state_update` overrides parent class `Metric` in an inconsistent manner\n  `InconsistentPathRatio.full_state_update` has type `bool`, which is not consistent with `bool | None` in `Metric.full_state_update` (the type of read-write attributes cannot be changed)",
-      "concise_description": "Class member `InconsistentPathRatio.full_state_update` overrides parent class `Metric` in an inconsistent manner",
+      "name": "bad-argument-type",
+      "description": "Argument `DictConfig` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.predict_by_feat`",
+      "concise_description": "Argument `DictConfig` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.predict_by_feat`",
       "severity": "error"
     },
     {
-      "line": 164,
+      "line": 37,
       "column": 17,
-      "stop_line": 164,
-      "stop_column": 34,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "stop_line": 37,
+      "stop_column": 25,
+      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`+=` is not supported between `Module` and `Literal[1]`\n  Expected `__iadd__` to be a callable, got `Module | Tensor`",
-      "concise_description": "`+=` is not supported between `Module` and `Literal[1]`",
+      "name": "bad-argument-type",
+      "description": "Argument `DictConfig` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.predict_by_feat`",
+      "concise_description": "Argument `DictConfig` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.predict_by_feat`",
       "severity": "error"
     },
     {
-      "line": 165,
-      "column": 9,
-      "stop_line": 165,
-      "stop_column": 24,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 55,
+      "column": 27,
+      "stop_line": 55,
+      "stop_column": 59,
+      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`+=` is not supported between `Module` and `int`\n  Expected `__iadd__` to be a callable, got `Module | Tensor`",
-      "concise_description": "`+=` is not supported between `Module` and `int`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 169,
-      "column": 16,
-      "stop_line": 169,
-      "stop_column": 70,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 66,
+      "column": 47,
+      "stop_line": 66,
+      "stop_column": 51,
+      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`/` is not supported between `Module` and `Tensor`\n  Expected `__truediv__` to be a callable, got `Module | Tensor`",
-      "concise_description": "`/` is not supported between `Module` and `Tensor`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
       "severity": "error"
     },
     {
-      "line": 169,
-      "column": 39,
-      "stop_line": 169,
+      "line": 85,
+      "column": 27,
+      "stop_line": 85,
       "stop_column": 59,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
       "code": -2,
-      "name": "not-callable",
-      "description": "Expected a callable, got `Tensor`",
-      "concise_description": "Expected a callable, got `Tensor`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 179,
-      "column": 5,
-      "stop_line": 179,
-      "stop_column": 22,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 39,
+      "column": 16,
+      "stop_line": 88,
+      "stop_column": 10,
+      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `WeightedHierarchicalPrecision.full_state_update` overrides parent class `Metric` in an inconsistent manner\n  `WeightedHierarchicalPrecision.full_state_update` has type `bool`, which is not consistent with `bool | None` in `Metric.full_state_update` (the type of read-write attributes cannot be changed)",
-      "concise_description": "Class member `WeightedHierarchicalPrecision.full_state_update` overrides parent class `Metric` in an inconsistent manner",
+      "name": "bad-return",
+      "description": "Returned type `dict[str, Tensor | dict[str, Tensor] | list[@_]]` is not assignable to declared return type `dict[str, Tensor]`",
+      "concise_description": "Returned type `dict[str, Tensor | dict[str, Tensor] | list[@_]]` is not assignable to declared return type `dict[str, Tensor]`",
       "severity": "error"
     },
     {
-      "line": 197,
-      "column": 13,
-      "stop_line": 197,
-      "stop_column": 26,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 129,
+      "column": 65,
+      "stop_line": 129,
+      "stop_column": 72,
+      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
       "code": -2,
-      "name": "not-callable",
-      "description": "Expected a callable, got `Tensor`",
-      "concise_description": "Expected a callable, got `Tensor`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.deim_loss.DEIMCriterion.loss_labels_mal`",
+      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.deim_loss.DEIMCriterion.loss_labels_mal`",
       "severity": "error"
     },
     {
-      "line": 203,
-      "column": 22,
-      "stop_line": 203,
-      "stop_column": 36,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 140,
+      "column": 65,
+      "stop_line": 140,
+      "stop_column": 72,
+      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
       "code": -2,
-      "name": "not-callable",
-      "description": "Expected a callable, got `Tensor`",
-      "concise_description": "Expected a callable, got `Tensor`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_labels_vfl`",
+      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_labels_vfl`",
       "severity": "error"
     },
     {
-      "line": 209,
-      "column": 13,
-      "stop_line": 209,
-      "stop_column": 25,
-      "path": "src/otx/metrics/hier_metric_collection.py",
+      "line": 151,
+      "column": 60,
+      "stop_line": 151,
+      "stop_column": 67,
+      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
       "code": -2,
-      "name": "not-callable",
-      "description": "Expected a callable, got `Tensor`",
-      "concise_description": "Expected a callable, got `Tensor`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_boxes`",
+      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_boxes`",
       "severity": "error"
     },
     {
-      "line": 76,
-      "column": 28,
-      "stop_line": 76,
-      "stop_column": 33,
-      "path": "src/otx/metrics/mean_ap.py",
+      "line": 163,
+      "column": 60,
+      "stop_line": 163,
+      "stop_column": 67,
+      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`masks` may be uninitialized",
-      "concise_description": "`masks` may be uninitialized",
+      "name": "bad-argument-type",
+      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_local`",
+      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_local`",
       "severity": "error"
     },
     {
-      "line": 98,
-      "column": 45,
-      "stop_line": 98,
-      "stop_column": 55,
-      "path": "src/otx/metrics/mean_ap.py",
+      "line": 215,
+      "column": 64,
+      "stop_line": 215,
+      "stop_column": 71,
+      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `box_format` with type `Literal['cxcywh', 'xywh', 'xyxy']` in function `torchmetrics.detection.mean_ap.MeanAveragePrecision.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `box_format` with type `Literal['cxcywh', 'xywh', 'xyxy']` in function `torchmetrics.detection.mean_ap.MeanAveragePrecision.__init__`",
+      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.deim_loss.DEIMCriterion.loss_labels_mal`",
+      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.deim_loss.DEIMCriterion.loss_labels_mal`",
       "severity": "error"
     },
     {
-      "line": 98,
-      "column": 57,
-      "stop_line": 98,
-      "stop_column": 65,
-      "path": "src/otx/metrics/mean_ap.py",
+      "line": 218,
+      "column": 64,
+      "stop_line": 218,
+      "stop_column": 71,
+      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `iou_type` with type `Literal['bbox', 'segm'] | tuple[str]` in function `torchmetrics.detection.mean_ap.MeanAveragePrecision.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `iou_type` with type `Literal['bbox', 'segm'] | tuple[str]` in function `torchmetrics.detection.mean_ap.MeanAveragePrecision.__init__`",
+      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_labels_vfl`",
+      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_labels_vfl`",
       "severity": "error"
     },
     {
-      "line": 45,
-      "column": 9,
-      "stop_line": 45,
-      "stop_column": 15,
-      "path": "src/otx/metrics/mlc_map.py",
+      "line": 37,
+      "column": 45,
+      "stop_line": 37,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/losses/test_yolox_loss.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `MultilabelmAP.update` overrides parent class `Metric` in an inconsistent manner\n  `MultilabelmAP.update` has type `BoundMethod[MultilabelmAP, (self: MultilabelmAP, preds: Tensor, target: Tensor) -> None]`, which is not assignable to `BoundMethod[MultilabelmAP, (self: MultilabelmAP, *_: Any, **__: Any) -> None]`, the type of `Metric.update`",
-      "concise_description": "Class member `MultilabelmAP.update` overrides parent class `Metric` in an inconsistent manner",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
       "severity": "error"
     },
     {
-      "line": 164,
-      "column": 9,
-      "stop_line": 164,
-      "stop_column": 15,
-      "path": "src/otx/metrics/pck.py",
+      "line": 63,
+      "column": 45,
+      "stop_line": 63,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/losses/test_yolox_loss.py",
       "code": -2,
-      "name": "bad-override",
-      "description": "Class member `PCKMeasure.update` overrides parent class `Metric` in an inconsistent manner\n  `PCKMeasure.update` has type `BoundMethod[PCKMeasure, (self: PCKMeasure, preds: list[dict[str, Tensor]], target: list[dict[str, Tensor]]) -> None]`, which is not assignable to `BoundMethod[PCKMeasure, (self: PCKMeasure, *_: Any, **__: Any) -> None]`, the type of `Metric.update`",
-      "concise_description": "Class member `PCKMeasure.update` overrides parent class `Metric` in an inconsistent manner",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
       "severity": "error"
     },
     {
-      "line": 168,
-      "column": 17,
-      "stop_line": 170,
-      "stop_column": 18,
-      "path": "src/otx/metrics/pck.py",
+      "line": 84,
+      "column": 45,
+      "stop_line": 84,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/losses/test_yolox_loss.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `iterable` with type `Iterable[ndarray[Unknown, Unknown]]` in function `list.extend`",
-      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `iterable` with type `Iterable[ndarray[Unknown, Unknown]]` in function `list.extend`",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
       "severity": "error"
     },
     {
-      "line": 173,
-      "column": 17,
-      "stop_line": 175,
-      "stop_column": 18,
-      "path": "src/otx/metrics/pck.py",
+      "line": 328,
+      "column": 9,
+      "stop_line": 328,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/models/detection/necks/test_hybrid_encoder.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `iterable` with type `Iterable[ndarray[Unknown, Unknown]]` in function `list.extend`",
-      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `iterable` with type `Iterable[ndarray[Unknown, Unknown]]` in function `list.extend`",
+      "name": "missing-attribute",
+      "description": "Object of class `int` has no attribute `backward`",
+      "concise_description": "Object of class `int` has no attribute `backward`",
       "severity": "error"
     },
     {
-      "line": 274,
-      "column": 56,
-      "stop_line": 274,
-      "stop_column": 66,
-      "path": "src/otx/tools/auto_configurator.py",
+      "line": 117,
+      "column": 21,
+      "stop_line": 117,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/models/detection/test_atss.py",
       "code": -2,
-      "name": "unbound-name",
-      "description": "`datamodule` may be uninitialized",
-      "concise_description": "`datamodule` may be uninitialized",
+      "name": "bad-argument-type",
+      "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
+      "concise_description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
       "severity": "error"
     },
     {
-      "line": 336,
-      "column": 9,
-      "stop_line": 336,
-      "stop_column": 18,
-      "path": "src/otx/types/label.py",
-      "code": -2,
-      "name": "bad-param-name-override",
-      "description": "Class member `NullLabelInfo.from_json` overrides parent class `LabelInfo` in an inconsistent manner\n  Got parameter name `_`, expected `serialized`",
-      "concise_description": "Class member `NullLabelInfo.from_json` overrides parent class `LabelInfo` in an inconsistent manner",
-      "severity": "error"
-    },
-    {
-      "line": 60,
-      "column": 55,
-      "stop_line": 60,
-      "stop_column": 76,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `(values: list[Any], context: dict[str, Any]) -> MockSample` is not assignable to parameter `unflatten_fn` with type `(Iterable[Any], Any) -> Any` in function `torch.utils._pytree.register_pytree_node`",
-      "concise_description": "Argument `(values: list[Any], context: dict[str, Any]) -> MockSample` is not assignable to parameter `unflatten_fn` with type `(Iterable[Any], Any) -> Any` in function `torch.utils._pytree.register_pytree_node`",
-      "severity": "error"
-    },
-    {
-      "line": 104,
-      "column": 30,
-      "stop_line": 104,
-      "stop_column": 36,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "not-a-type",
-      "description": "Expected a type form, got instance of `Module[pytest]`",
-      "concise_description": "Expected a type form, got instance of `Module[pytest]`",
-      "severity": "error"
-    },
-    {
-      "line": 233,
-      "column": 32,
-      "stop_line": 233,
-      "stop_column": 83,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])",
-      "severity": "error"
-    },
-    {
-      "line": 257,
-      "column": 32,
-      "stop_line": 257,
-      "stop_column": 83,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])",
-      "severity": "error"
-    },
-    {
-      "line": 281,
-      "column": 32,
-      "stop_line": 281,
-      "stop_column": 83,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])",
-      "severity": "error"
-    },
-    {
-      "line": 298,
-      "column": 12,
-      "stop_line": 298,
-      "stop_column": 73,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[MockSample, OTXSampleBatch, OTXPredictionBatch]`",
-      "concise_description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[MockSample, OTXSampleBatch, OTXPredictionBatch]`",
-      "severity": "error"
-    },
-    {
-      "line": 305,
-      "column": 32,
-      "stop_line": 305,
-      "stop_column": 83,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])",
-      "severity": "error"
-    },
-    {
-      "line": 306,
-      "column": 43,
-      "stop_line": 306,
-      "stop_column": 113,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xyxy'], canvas_size=tuple[Literal[10], Literal[10]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xyxy'], canvas_size=tuple[Literal[10], Literal[10]])",
-      "severity": "error"
-    },
-    {
-      "line": 329,
-      "column": 12,
-      "stop_line": 329,
-      "stop_column": 73,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
-      "concise_description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
-      "severity": "error"
-    },
-    {
-      "line": 336,
-      "column": 32,
-      "stop_line": 336,
-      "stop_column": 83,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[64], Literal[64]], ori_shape=tuple[Literal[64], Literal[64]])",
-      "severity": "error"
-    },
-    {
-      "line": 337,
-      "column": 43,
-      "stop_line": 337,
-      "stop_column": 113,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xyxy'], canvas_size=tuple[Literal[10], Literal[10]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xyxy'], canvas_size=tuple[Literal[10], Literal[10]])",
-      "severity": "error"
-    },
-    {
-      "line": 350,
-      "column": 22,
-      "stop_line": 350,
-      "stop_column": 39,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)",
-      "severity": "error"
-    },
-    {
-      "line": 357,
-      "column": 22,
-      "stop_line": 357,
-      "stop_column": 39,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)",
-      "severity": "error"
-    },
-    {
-      "line": 364,
-      "column": 12,
-      "stop_line": 364,
-      "stop_column": 73,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
-      "concise_description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
-      "severity": "error"
-    },
-    {
-      "line": 371,
-      "column": 32,
-      "stop_line": 371,
-      "stop_column": 83,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])",
-      "severity": "error"
-    },
-    {
-      "line": 380,
-      "column": 22,
-      "stop_line": 380,
-      "stop_column": 57,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)",
-      "severity": "error"
-    },
-    {
-      "line": 385,
-      "column": 22,
-      "stop_line": 385,
-      "stop_column": 57,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)",
-      "severity": "error"
-    },
-    {
-      "line": 391,
-      "column": 12,
-      "stop_line": 391,
-      "stop_column": 73,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
-      "concise_description": "Returned type `tuple[MockSample, OTXPredictionBatch, OTXSampleBatch]` is not assignable to declared return type `tuple[tuple[Unknown, ...], MockSample, OTXSampleBatch]`",
-      "severity": "error"
-    },
-    {
-      "line": 489,
-      "column": 20,
-      "stop_line": 496,
-      "stop_column": 10,
-      "path": "tests/conftest.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
-      "concise_description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 67,
-      "column": 37,
-      "stop_line": 67,
-      "stop_column": 55,
-      "path": "tests/integration/api/test_augmentation.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 68,
-      "column": 37,
-      "stop_line": 68,
-      "stop_column": 55,
-      "path": "tests/integration/api/test_augmentation.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 69,
-      "column": 42,
-      "stop_line": 69,
-      "stop_column": 60,
-      "path": "tests/integration/api/test_augmentation.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 70,
-      "column": 41,
-      "stop_line": 70,
-      "stop_column": 59,
-      "path": "tests/integration/api/test_augmentation.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 71,
-      "column": 40,
-      "stop_line": 71,
-      "stop_column": 58,
-      "path": "tests/integration/api/test_augmentation.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 21,
-      "column": 36,
-      "stop_line": 21,
-      "stop_column": 54,
-      "path": "tests/integration/api/test_engine_api.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 76,
-      "column": 15,
-      "stop_line": 76,
-      "stop_column": 34,
-      "path": "tests/integration/api/test_engine_api.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Path | dict[str, Path]` is not assignable to parameter `model` with type `OTXModel | OVModel | Path | PathLike[Unknown] | str` in function `otx.engine.create_engine`",
-      "concise_description": "Argument `Path | dict[str, Path]` is not assignable to parameter `model` with type `OTXModel | OVModel | Path | PathLike[Unknown] | str` in function `otx.engine.create_engine`",
-      "severity": "error"
-    },
-    {
-      "line": 104,
-      "column": 32,
-      "stop_line": 104,
-      "stop_column": 59,
-      "path": "tests/integration/api/test_engine_api.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OTXSample` has no attribute `saliency_map`",
-      "concise_description": "Object of class `OTXSample` has no attribute `saliency_map`",
-      "severity": "error"
-    },
-    {
-      "line": 108,
-      "column": 36,
-      "stop_line": 108,
-      "stop_column": 59,
-      "path": "tests/integration/api/test_engine_api.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `TILE_RECIPE_LIST` in module `pytest`",
-      "concise_description": "No attribute `TILE_RECIPE_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 128,
-      "column": 35,
-      "stop_line": 128,
-      "stop_column": 41,
-      "path": "tests/integration/api/test_engine_api.py",
-      "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `format` in function `datumaro.util.real_decorator`",
-      "concise_description": "Unexpected keyword argument `format` in function `datumaro.util.real_decorator`",
-      "severity": "error"
-    },
-    {
-      "line": 128,
-      "column": 78,
-      "stop_line": 128,
-      "stop_column": 88,
-      "path": "tests/integration/api/test_engine_api.py",
-      "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `save_media` in function `datumaro.util.real_decorator`",
-      "concise_description": "Unexpected keyword argument `save_media` in function `datumaro.util.real_decorator`",
-      "severity": "error"
-    },
-    {
-      "line": 151,
-      "column": 58,
-      "stop_line": 151,
-      "stop_column": 82,
-      "path": "tests/integration/api/test_engine_api.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tiler` has no attribute `tile_size`",
-      "concise_description": "Object of class `Tiler` has no attribute `tile_size`",
-      "severity": "error"
-    },
-    {
-      "line": 152,
-      "column": 53,
-      "stop_line": 152,
-      "stop_column": 81,
-      "path": "tests/integration/api/test_engine_api.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tiler` has no attribute `tiles_overlap`",
-      "concise_description": "Object of class `Tiler` has no attribute `tiles_overlap`",
-      "severity": "error"
-    },
-    {
-      "line": 79,
-      "column": 16,
-      "stop_line": 83,
-      "stop_column": 10,
-      "path": "tests/integration/api/test_geti_interaction.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[Engine, dict[str, Any]]` is not assignable to declared return type `tuple[OTXEngine, dict[str, Any]]`",
-      "concise_description": "Returned type `tuple[Engine, dict[str, Any]]` is not assignable to declared return type `tuple[OTXEngine, dict[str, Any]]`",
-      "severity": "error"
-    },
-    {
-      "line": 139,
-      "column": 45,
-      "stop_line": 139,
-      "stop_column": 54,
-      "path": "tests/integration/api/test_geti_interaction.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Path` is not assignable to parameter `model` with type `InferenceAdapter | str` in function `model_api.models.model.Model.create_model`",
-      "concise_description": "Argument `Path` is not assignable to parameter `model` with type `InferenceAdapter | str` in function `model_api.models.model.Model.create_model`",
-      "severity": "error"
-    },
-    {
-      "line": 162,
-      "column": 45,
-      "stop_line": 162,
-      "stop_column": 58,
-      "path": "tests/integration/api/test_geti_interaction.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Path` is not assignable to parameter `model` with type `InferenceAdapter | str` in function `model_api.models.model.Model.create_model`",
-      "concise_description": "Argument `Path` is not assignable to parameter `model` with type `InferenceAdapter | str` in function `model_api.models.model.Model.create_model`",
-      "severity": "error"
-    },
-    {
-      "line": 185,
-      "column": 26,
-      "stop_line": 185,
-      "stop_column": 44,
-      "path": "tests/integration/api/test_geti_interaction.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Engine` has no attribute `optimize`",
-      "concise_description": "Object of class `Engine` has no attribute `optimize`",
-      "severity": "error"
-    },
-    {
-      "line": 14,
-      "column": 19,
-      "stop_line": 14,
-      "stop_column": 37,
-      "path": "tests/integration/api/test_xai.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 69,
-      "column": 22,
-      "stop_line": 69,
-      "stop_column": 46,
-      "path": "tests/integration/api/test_xai.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
-      "severity": "error"
-    },
-    {
-      "line": 71,
-      "column": 20,
-      "stop_line": 71,
-      "stop_column": 47,
-      "path": "tests/integration/api/test_xai.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 71,
-      "column": 20,
-      "stop_line": 71,
-      "stop_column": 86,
-      "path": "tests/integration/api/test_xai.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | bool | Unknown` is not assignable to parameter `iterable` with type `Iterable[object]` in function `all`\n  Protocol `Iterable` requires attribute `__iter__`",
-      "concise_description": "Argument `Tensor | bool | Unknown` is not assignable to parameter `iterable` with type `Iterable[object]` in function `all`",
-      "severity": "error"
-    },
-    {
-      "line": 71,
-      "column": 51,
-      "stop_line": 71,
-      "stop_column": 86,
-      "path": "tests/integration/api/test_xai.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 72,
-      "column": 20,
-      "stop_line": 72,
-      "stop_column": 47,
-      "path": "tests/integration/api/test_xai.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 72,
-      "column": 20,
-      "stop_line": 72,
-      "stop_column": 86,
-      "path": "tests/integration/api/test_xai.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | bool | Unknown` is not assignable to parameter `iterable` with type `Iterable[object]` in function `all`\n  Protocol `Iterable` requires attribute `__iter__`",
-      "concise_description": "Argument `Tensor | bool | Unknown` is not assignable to parameter `iterable` with type `Iterable[object]` in function `all`",
-      "severity": "error"
-    },
-    {
-      "line": 72,
-      "column": 51,
-      "stop_line": 72,
-      "stop_column": 86,
-      "path": "tests/integration/api/test_xai.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 182,
-      "column": 29,
-      "stop_line": 182,
-      "stop_column": 59,
-      "path": "tests/integration/api/test_xai.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `astype`\n  Did you mean `dtype`?",
-      "concise_description": "Object of class `Tensor` has no attribute `astype`",
-      "severity": "error"
-    },
-    {
-      "line": 182,
-      "column": 74,
-      "stop_line": 182,
-      "stop_column": 101,
-      "path": "tests/integration/api/test_xai.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `astype`\n  Did you mean `dtype`?",
-      "concise_description": "Object of class `Tensor` has no attribute `astype`",
-      "severity": "error"
-    },
-    {
-      "line": 137,
-      "column": 5,
-      "stop_line": 137,
-      "stop_column": 21,
-      "path": "tests/integration/conftest.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `TASK_LIST` in module `pytest`",
-      "concise_description": "No attribute `TASK_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 138,
-      "column": 5,
-      "stop_line": 138,
-      "stop_column": 23,
-      "path": "tests/integration/conftest.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "concise_description": "No attribute `RECIPE_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 139,
-      "column": 5,
-      "stop_line": 139,
-      "stop_column": 26,
-      "path": "tests/integration/conftest.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `RECIPE_OV_LIST` in module `pytest`",
-      "concise_description": "No attribute `RECIPE_OV_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 140,
-      "column": 5,
-      "stop_line": 140,
-      "stop_column": 28,
-      "path": "tests/integration/conftest.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "No attribute `TILE_RECIPE_LIST` in module `pytest`",
-      "concise_description": "No attribute `TILE_RECIPE_LIST` in module `pytest`",
-      "severity": "error"
-    },
-    {
-      "line": 210,
-      "column": 23,
-      "stop_line": 210,
-      "stop_column": 33,
-      "path": "tests/perf_v2/benchmark.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Path | str | None` is not assignable to parameter `model` with type `OVModel | Path | PathLike[Unknown] | str` in function `otx.backend.openvino.engine.OVEngine.__init__`",
-      "concise_description": "Argument `Path | str | None` is not assignable to parameter `model` with type `OVModel | Path | PathLike[Unknown] | str` in function `otx.backend.openvino.engine.OVEngine.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 479,
-      "column": 25,
-      "stop_line": 479,
-      "stop_column": 47,
-      "path": "tests/perf_v2/benchmark.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `rglob`",
-      "concise_description": "Object of class `NoneType` has no attribute `rglob`",
-      "severity": "error"
-    },
-    {
-      "line": 601,
-      "column": 28,
-      "stop_line": 601,
-      "stop_column": 59,
-      "path": "tests/perf_v2/benchmark.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `_LocIndexerFrame[DataFrame]`\n  No matching overload found for function `pandas.core.frame._LocIndexerFrame.__getitem__` called with arguments: (Hashable)\n  Possible overloads:\n  (idx: tuple[((DataFrame) -> ScalarT) | Timestamp | int | str | str_ | tuple[Scalar, ...], int | str | str_ | tuple[Scalar, ...]]) -> Scalar [closest match]\n  (idx: ((DataFrame) -> ScalarT) | tuple[((DataFrame) -> Index[Any] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | list[HashableT] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any] | ScalarT) | Index[Any] | SequenceNotStr[Timestamp | float | str] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any] | tuple[IndexOpsMixin[Any, Any] | Sequence[Scalar] | Series[builtins.bool] | Timedelta | Timestamp | builtins.bool | bytes | complex | complexfloating[Any, Any] | date | datetime | datetime64[date | int | None] | float | floating[Any] | int | integer[Any] | list[builtins.bool] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | slice[Any, Any, Any] | str | timedelta | timedelta64[int | timedelta | None], ...], ScalarT | None] | None) -> Series[Any]\n  (idx: Scalar) -> DataFrame | Series[Any]\n  (idx: tuple[Scalar, slice[Any, Any, Any]]) -> DataFrame | Series[Any]\n  (key: ((DataFrame) -> Index[Any] | Sequence[Hashable] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any]) | Index[Any] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | list[HashableT] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any] | tuple[((...) -> Unknown) | Index[Any] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | list[HashableT] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any] | tuple[IndexOpsMixin[Any, Any] | Sequence[Scalar] | Series[builtins.bool] | Timedelta | Timestamp | builtins.bool | bytes | complex | complexfloating[Any, Any] | date | datetime | datetime64[date | int | None] | float | floating[Any] | int | integer[Any] | list[builtins.bool] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | slice[Any, Any, Any] | str | timedelta | timedelta64[int | timedelta | None], ...], ((...) -> Unknown) | Index[Any] | Iterable[HashableT] | Series[builtins.bool] | Series[int] | list[builtins.bool] | list[int] | ndarray[tuple[int, ...], dtype[numpy.bool[builtins.bool]]] | ndarray[tuple[int, ...], dtype[integer[Any]]] | slice[Any, Any, Any]]) -> DataFrame",
-      "concise_description": "Cannot index into `_LocIndexerFrame[DataFrame]`",
-      "severity": "error"
-    },
-    {
-      "line": 643,
-      "column": 13,
-      "stop_line": 643,
-      "stop_column": 120,
-      "path": "tests/perf_v2/benchmark.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `bool | str` is not assignable to parameter `deterministic` with type `bool` in function `Benchmark.__init__`",
-      "concise_description": "Argument `bool | str` is not assignable to parameter `deterministic` with type `bool` in function `Benchmark.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 659,
-      "column": 16,
-      "stop_line": 659,
-      "stop_column": 22,
-      "path": "tests/perf_v2/benchmark.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DataFrame | None` is not assignable to parameter `result` with type `DataFrame` in function `Benchmark.check`",
-      "concise_description": "Argument `DataFrame | None` is not assignable to parameter `result` with type `DataFrame` in function `Benchmark.check`",
-      "severity": "error"
-    },
-    {
-      "line": 134,
-      "column": 34,
-      "stop_line": 134,
-      "stop_column": 83,
-      "path": "tests/perf_v2/summary.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `pandas.core.groupby.generic.DataFrameGroupBy.aggregate` called with arguments: (dict[str, list[str]])\n  Possible overloads:\n  (func: Literal['size']) -> Series[Any] [closest match]\n  (func: ((...) -> Unknown) | Mapping[Unknown, ((...) -> Unknown) | list[AggFuncTypeBase] | str | ufunc] | list[AggFuncTypeBase] | str | ufunc | None = ..., *args: Any, *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., **kwargs: Any) -> DataFrame\n  (func: ((...) -> Unknown) | Mapping[Unknown, ((...) -> Unknown) | list[AggFuncTypeBase] | str | ufunc] | list[AggFuncTypeBase] | str | ufunc | None = None, /, **kwargs: Any) -> DataFrame",
-      "concise_description": "No matching overload found for function `pandas.core.groupby.generic.DataFrameGroupBy.aggregate` called with arguments: (dict[str, list[str]])",
-      "severity": "error"
-    },
-    {
-      "line": 274,
-      "column": 34,
-      "stop_line": 274,
-      "stop_column": 83,
-      "path": "tests/perf_v2/summary.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `pandas.core.groupby.generic.DataFrameGroupBy.aggregate` called with arguments: (dict[str, list[str]])\n  Possible overloads:\n  (func: Literal['size']) -> Series[Any] [closest match]\n  (func: ((...) -> Unknown) | Mapping[Unknown, ((...) -> Unknown) | list[AggFuncTypeBase] | str | ufunc] | list[AggFuncTypeBase] | str | ufunc | None = ..., *args: Any, *, engine: WindowingEngine = ..., engine_kwargs: WindowingEngineKwargs = ..., **kwargs: Any) -> DataFrame\n  (func: ((...) -> Unknown) | Mapping[Unknown, ((...) -> Unknown) | list[AggFuncTypeBase] | str | ufunc] | list[AggFuncTypeBase] | str | ufunc | None = None, /, **kwargs: Any) -> DataFrame",
-      "concise_description": "No matching overload found for function `pandas.core.groupby.generic.DataFrameGroupBy.aggregate` called with arguments: (dict[str, list[str]])",
-      "severity": "error"
-    },
-    {
-      "line": 53,
-      "column": 33,
-      "stop_line": 53,
-      "stop_column": 39,
-      "path": "tests/regression/test_regression.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `TempdirFactory` is not assignable to parameter `*args` with type `PathLike[str] | str` in function `pathlib.Path.__new__`",
-      "concise_description": "Argument `TempdirFactory` is not assignable to parameter `*args` with type `PathLike[str] | str` in function `pathlib.Path.__new__`",
-      "severity": "error"
-    },
-    {
-      "line": 85,
-      "column": 9,
-      "stop_line": 85,
-      "stop_column": 20,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "bad-override",
-      "description": "Class member `CommonSemanticSegmentationExporter._apply_impl` overrides parent class `Exporter` in an inconsistent manner\n  `CommonSemanticSegmentationExporter._apply_impl` has type `BoundMethod[CommonSemanticSegmentationExporter, (self: CommonSemanticSegmentationExporter) -> None]`, which is not assignable to `BoundMethod[CommonSemanticSegmentationExporter, (self: CommonSemanticSegmentationExporter) -> Never]`, the type of `Exporter._apply_impl`",
-      "concise_description": "Class member `CommonSemanticSegmentationExporter._apply_impl` overrides parent class `Exporter` in an inconsistent manner",
-      "severity": "error"
-    },
-    {
-      "line": 100,
-      "column": 48,
-      "stop_line": 100,
-      "stop_column": 80,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "not-iterable",
-      "description": "Type `Categories` is not iterable",
-      "concise_description": "Type `Categories` is not iterable",
-      "severity": "error"
-    },
-    {
-      "line": 125,
-      "column": 52,
-      "stop_line": 125,
-      "stop_column": 60,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Path` is not assignable to parameter `save_dir` with type `str` in function `CommonSemanticSegmentationExporter._export_item_annotation`",
-      "concise_description": "Argument `Path` is not assignable to parameter `save_dir` with type `str` in function `CommonSemanticSegmentationExporter._export_item_annotation`",
-      "severity": "error"
-    },
-    {
-      "line": 137,
-      "column": 28,
-      "stop_line": 137,
-      "stop_column": 43,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `data`",
-      "concise_description": "Object of class `NoneType` has no attribute `data`",
-      "severity": "error"
-    },
-    {
-      "line": 144,
-      "column": 45,
-      "stop_line": 144,
-      "stop_column": 67,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[ndarray[tuple[int, ...], dtype[Any]]]` is not assignable to parameter `polygons` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.polygon_to_bitmap`",
-      "concise_description": "Argument `list[ndarray[tuple[int, ...], dtype[Any]]]` is not assignable to parameter `polygons` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.polygon_to_bitmap`",
-      "severity": "error"
-    },
-    {
-      "line": 144,
-      "column": 55,
-      "stop_line": 144,
-      "stop_column": 65,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Annotation` has no attribute `points`",
-      "concise_description": "Object of class `Annotation` has no attribute `points`",
-      "severity": "error"
-    },
-    {
-      "line": 145,
-      "column": 38,
-      "stop_line": 145,
-      "stop_column": 47,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Annotation` has no attribute `label`",
-      "concise_description": "Object of class `Annotation` has no attribute `label`",
-      "severity": "error"
-    },
-    {
-      "line": 147,
-      "column": 45,
-      "stop_line": 147,
-      "stop_column": 91,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[ndarray[tuple[int, ...], dtype[Any]]]` is not assignable to parameter `polygons` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.polygon_to_bitmap`",
-      "concise_description": "Argument `list[ndarray[tuple[int, ...], dtype[Any]]]` is not assignable to parameter `polygons` with type `ndarray[Unknown, Unknown]` in function `otx.data.utils.structures.mask.mask_util.polygon_to_bitmap`",
-      "severity": "error"
-    },
-    {
-      "line": 147,
-      "column": 63,
-      "stop_line": 147,
-      "stop_column": 77,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Annotation` has no attribute `as_polygon`",
-      "concise_description": "Object of class `Annotation` has no attribute `as_polygon`",
-      "severity": "error"
-    },
-    {
-      "line": 148,
-      "column": 38,
-      "stop_line": 148,
-      "stop_column": 47,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Annotation` has no attribute `label`",
-      "concise_description": "Object of class `Annotation` has no attribute `label`",
-      "severity": "error"
-    },
-    {
-      "line": 150,
-      "column": 49,
-      "stop_line": 150,
-      "stop_column": 61,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Annotation` has no attribute `get_bbox`",
-      "concise_description": "Object of class `Annotation` has no attribute `get_bbox`",
-      "severity": "error"
-    },
-    {
-      "line": 151,
-      "column": 55,
-      "stop_line": 151,
-      "stop_column": 64,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Annotation` has no attribute `label`",
-      "concise_description": "Object of class `Annotation` has no attribute `label`",
-      "severity": "error"
-    },
-    {
-      "line": 158,
-      "column": 15,
-      "stop_line": 158,
-      "stop_column": 44,
-      "path": "tests/test_helpers.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`/` is not supported between `str` and `str`\n  Cannot find `__truediv__` or `__rtruediv__`",
-      "concise_description": "`/` is not supported between `str` and `str`",
-      "severity": "error"
-    },
-    {
-      "line": 23,
-      "column": 17,
-      "stop_line": 23,
-      "stop_column": 28,
-      "path": "tests/unit/backend/native/callbacks/test_gpu_mem_monitor.py",
-      "code": -2,
-      "name": "bad-override",
-      "description": "Class member `DebugLogger.log_metrics` overrides parent class `CSVLogger` in an inconsistent manner\n  `DebugLogger.log_metrics` has type `BoundMethod[TestGPUMemMonitor.test_gpu_monitor.DebugLogger, (self: TestGPUMemMonitor.test_gpu_monitor.DebugLogger, metrics: dict[str, float], step: int | None = None) -> None]`, which is not assignable to `BoundMethod[TestGPUMemMonitor.test_gpu_monitor.DebugLogger, (self: TestGPUMemMonitor.test_gpu_monitor.DebugLogger, metrics: dict[str, Tensor | float], step: int | None = None) -> None]`, the type of `CSVLogger.log_metrics`",
-      "concise_description": "Class member `DebugLogger.log_metrics` overrides parent class `CSVLogger` in an inconsistent manner",
-      "severity": "error"
-    },
-    {
-      "line": 16,
-      "column": 9,
-      "stop_line": 16,
-      "stop_column": 20,
-      "path": "tests/unit/backend/native/exporter/test_base.py",
-      "code": -2,
-      "name": "bad-override",
-      "description": "Class member `MockModelExporter.to_openvino` overrides parent class `OTXModelExporter` in an inconsistent manner\n  `MockModelExporter.to_openvino` has type `BoundMethod[MockModelExporter, (self: MockModelExporter, model: Unknown, output_dir: Unknown, base_model_name: Unknown, precision: Unknown) -> Unknown]`, which is not assignable to `BoundMethod[MockModelExporter, (self: MockModelExporter, model: OTXModel, output_dir: Path, base_model_name: str = 'exported_model', precision: OTXPrecisionType = OTXPrecisionType.FP32) -> Path]`, the type of `OTXModelExporter.to_openvino`",
-      "concise_description": "Class member `MockModelExporter.to_openvino` overrides parent class `OTXModelExporter` in an inconsistent manner",
-      "severity": "error"
-    },
-    {
-      "line": 19,
-      "column": 9,
-      "stop_line": 19,
-      "stop_column": 16,
-      "path": "tests/unit/backend/native/exporter/test_base.py",
-      "code": -2,
-      "name": "bad-override",
-      "description": "Class member `MockModelExporter.to_onnx` overrides parent class `OTXModelExporter` in an inconsistent manner\n  `MockModelExporter.to_onnx` has type `BoundMethod[MockModelExporter, (self: MockModelExporter, model: Unknown, output_dir: Unknown, base_model_name: Unknown, precision: Unknown) -> Unknown]`, which is not assignable to `BoundMethod[MockModelExporter, (self: MockModelExporter, model: OTXModel, output_dir: Path, base_model_name: str = 'exported_model', precision: OTXPrecisionType = OTXPrecisionType.FP32, embed_metadata: bool = True) -> Path]`, the type of `OTXModelExporter.to_onnx`",
-      "concise_description": "Class member `MockModelExporter.to_onnx` overrides parent class `OTXModelExporter` in an inconsistent manner",
-      "severity": "error"
-    },
-    {
-      "line": 31,
-      "column": 62,
-      "stop_line": 31,
-      "stop_column": 67,
-      "path": "tests/unit/backend/native/lightning/strategies/test_strategies.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal['xpu']` is not assignable to parameter `accelerator` with type `Accelerator | None` in function `otx.backend.native.lightning.strategies.xpu_single.SingleXPUStrategy.__init__`",
-      "concise_description": "Argument `Literal['xpu']` is not assignable to parameter `accelerator` with type `Accelerator | None` in function `otx.backend.native.lightning.strategies.xpu_single.SingleXPUStrategy.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 27,
-      "column": 61,
-      "stop_line": 27,
-      "stop_column": 65,
-      "path": "tests/unit/backend/native/models/classification/backbones/test_otx_efficientnet.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `None` is not assignable to parameter `pretrained` with type `bool` in function `otx.backend.native.models.classification.backbones.efficientnet.EfficientNetBackbone.__new__`",
-      "concise_description": "Argument `None` is not assignable to parameter `pretrained` with type `bool` in function `otx.backend.native.models.classification.backbones.efficientnet.EfficientNetBackbone.__new__`",
-      "severity": "error"
-    },
-    {
-      "line": 33,
-      "column": 91,
-      "stop_line": 33,
-      "stop_column": 95,
-      "path": "tests/unit/backend/native/models/classification/backbones/test_otx_efficientnet.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `None` is not assignable to parameter `pretrained` with type `bool` in function `otx.backend.native.models.classification.backbones.efficientnet.EfficientNetBackbone.__new__`",
-      "concise_description": "Argument `None` is not assignable to parameter `pretrained` with type `bool` in function `otx.backend.native.models.classification.backbones.efficientnet.EfficientNetBackbone.__new__`",
-      "severity": "error"
-    },
-    {
-      "line": 24,
-      "column": 23,
-      "stop_line": 24,
-      "stop_column": 53,
-      "path": "tests/unit/backend/native/models/classification/backbones/test_timm.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str | None` is not assignable to parameter `*args` with type `PathLike[str] | str` in function `pathlib.Path.__new__`",
-      "concise_description": "Argument `str | None` is not assignable to parameter `*args` with type `PathLike[str] | str` in function `pathlib.Path.__new__`",
-      "severity": "error"
-    },
-    {
-      "line": 71,
-      "column": 59,
-      "stop_line": 71,
-      "stop_column": 65,
-      "path": "tests/unit/backend/native/models/classification/backbones/test_vision_transformer.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `peft` with type `Literal['dora', 'lora'] | None` in function `otx.backend.native.models.classification.backbones.vision_transformer.VisionTransformerBackbone.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `peft` with type `Literal['dora', 'lora'] | None` in function `otx.backend.native.models.classification.backbones.vision_transformer.VisionTransformerBackbone.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 48,
-      "column": 20,
-      "stop_line": 52,
-      "stop_column": 10,
-      "path": "tests/unit/backend/native/models/classification/conftest.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
-      "concise_description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 109,
-      "column": 20,
-      "stop_line": 116,
-      "stop_column": 10,
-      "path": "tests/unit/backend/native/models/classification/conftest.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
-      "concise_description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 179,
-      "column": 20,
-      "stop_line": 183,
-      "stop_column": 10,
-      "path": "tests/unit/backend/native/models/classification/conftest.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
-      "concise_description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 217,
-      "column": 33,
-      "stop_line": 217,
-      "stop_column": 53,
-      "path": "tests/unit/backend/native/models/classification/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)",
-      "severity": "error"
-    },
-    {
-      "line": 218,
-      "column": 27,
-      "stop_line": 218,
-      "stop_column": 82,
-      "path": "tests/unit/backend/native/models/classification/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])",
-      "severity": "error"
-    },
-    {
-      "line": 31,
-      "column": 27,
-      "stop_line": 31,
-      "stop_column": 38,
-      "path": "tests/unit/backend/native/models/classification/heads/test_hlabel_cls_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `tuple[Literal[24], Literal[24], Literal[3]]` is not assignable to parameter `ori_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "concise_description": "Argument `tuple[Literal[24], Literal[24], Literal[3]]` is not assignable to parameter `ori_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "severity": "error"
-    },
-    {
-      "line": 32,
-      "column": 27,
-      "stop_line": 32,
-      "stop_column": 38,
-      "path": "tests/unit/backend/native/models/classification/heads/test_hlabel_cls_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `tuple[Literal[24], Literal[24], Literal[3]]` is not assignable to parameter `img_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "concise_description": "Argument `tuple[Literal[24], Literal[24], Literal[3]]` is not assignable to parameter `img_shape` with type `tuple[int, int]` in function `otx.data.entity.base.ImageInfo.__new__`",
-      "severity": "error"
-    },
-    {
-      "line": 17,
-      "column": 12,
-      "stop_line": 20,
-      "stop_column": 6,
-      "path": "tests/unit/backend/native/models/classification/heads/test_multilabel_cls_head.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `MultiLabelLinearClsHead` is not assignable to declared return type `None`",
-      "concise_description": "Returned type `MultiLabelLinearClsHead` is not assignable to declared return type `None`",
-      "severity": "error"
-    },
-    {
-      "line": 25,
-      "column": 12,
-      "stop_line": 30,
-      "stop_column": 6,
-      "path": "tests/unit/backend/native/models/classification/heads/test_multilabel_cls_head.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `MultiLabelNonLinearClsHead` is not assignable to declared return type `None`",
-      "concise_description": "Returned type `MultiLabelNonLinearClsHead` is not assignable to declared return type `None`",
-      "severity": "error"
-    },
-    {
-      "line": 38,
-      "column": 22,
-      "stop_line": 44,
-      "stop_column": 14,
-      "path": "tests/unit/backend/native/models/classification/heads/test_multilabel_cls_head.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, scale_factor=tuple[float, float], ori_shape=tuple[Literal[281], Literal[500]], img_shape=tuple[Literal[224], Literal[224]], ignored_labels=list[@_])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, scale_factor=tuple[float, float], ori_shape=tuple[Literal[281], Literal[500]], img_shape=tuple[Literal[224], Literal[224]], ignored_labels=list[@_])",
-      "severity": "error"
-    },
-    {
-      "line": 55,
-      "column": 22,
-      "stop_line": 61,
-      "stop_column": 14,
-      "path": "tests/unit/backend/native/models/classification/heads/test_multilabel_cls_head.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, scale_factor=tuple[float, float], ori_shape=tuple[Literal[281], Literal[500]], img_shape=tuple[Literal[224], Literal[224]], ignored_labels=list[int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, scale_factor=tuple[float, float], ori_shape=tuple[Literal[281], Literal[500]], img_shape=tuple[Literal[224], Literal[224]], ignored_labels=list[int])",
-      "severity": "error"
-    },
-    {
-      "line": 61,
-      "column": 16,
-      "stop_line": 61,
-      "stop_column": 44,
-      "path": "tests/unit/backend/native/models/classification/test_efficientnet.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `in_size`",
-      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
-      "severity": "error"
-    },
-    {
-      "line": 131,
-      "column": 16,
-      "stop_line": 131,
-      "stop_column": 44,
-      "path": "tests/unit/backend/native/models/classification/test_efficientnet.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `in_size`",
-      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
-      "severity": "error"
-    },
-    {
-      "line": 201,
-      "column": 16,
-      "stop_line": 201,
-      "stop_column": 44,
-      "path": "tests/unit/backend/native/models/classification/test_efficientnet.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `in_size`",
-      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
-      "severity": "error"
-    },
-    {
-      "line": 61,
-      "column": 16,
-      "stop_line": 61,
-      "stop_column": 44,
-      "path": "tests/unit/backend/native/models/classification/test_mobilenet_v3.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `in_size`",
-      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
-      "severity": "error"
-    },
-    {
-      "line": 131,
-      "column": 16,
-      "stop_line": 131,
-      "stop_column": 44,
-      "path": "tests/unit/backend/native/models/classification/test_mobilenet_v3.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `in_size`",
-      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
-      "severity": "error"
-    },
-    {
-      "line": 201,
-      "column": 16,
-      "stop_line": 201,
-      "stop_column": 44,
-      "path": "tests/unit/backend/native/models/classification/test_mobilenet_v3.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `in_size`",
-      "concise_description": "Object of class `Tensor` has no attribute `in_size`",
-      "severity": "error"
-    },
-    {
-      "line": 111,
-      "column": 20,
-      "stop_line": 111,
-      "stop_column": 45,
-      "path": "tests/unit/backend/native/models/classification/test_torchvision_model.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 112,
-      "column": 20,
-      "stop_line": 112,
-      "stop_column": 43,
-      "path": "tests/unit/backend/native/models/classification/test_torchvision_model.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 113,
-      "column": 20,
-      "stop_line": 113,
-      "stop_column": 43,
-      "path": "tests/unit/backend/native/models/classification/test_torchvision_model.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 13,
-      "column": 34,
-      "stop_line": 13,
-      "stop_column": 47,
-      "path": "tests/unit/backend/native/models/classification/utils/test_attention_with_peft.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Linear` is not assignable to parameter `qkv` with type `Attention` in function `otx.backend.native.models.classification.utils.peft.AttentionWithLoRA.__init__`",
-      "concise_description": "Argument `Linear` is not assignable to parameter `qkv` with type `Attention` in function `otx.backend.native.models.classification.utils.peft.AttentionWithLoRA.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 21,
-      "column": 34,
-      "stop_line": 21,
-      "stop_column": 47,
-      "path": "tests/unit/backend/native/models/classification/utils/test_attention_with_peft.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Linear` is not assignable to parameter `qkv` with type `Attention` in function `otx.backend.native.models.classification.utils.peft.AttentionWithDoRA.__init__`",
-      "concise_description": "Argument `Linear` is not assignable to parameter `qkv` with type `Attention` in function `otx.backend.native.models.classification.utils.peft.AttentionWithDoRA.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 12,
-      "column": 18,
-      "stop_line": 12,
-      "stop_column": 92,
-      "path": "tests/unit/backend/native/models/classification/utils/test_ignored_labels.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])",
-      "severity": "error"
-    },
-    {
-      "line": 13,
-      "column": 18,
-      "stop_line": 13,
-      "stop_column": 95,
-      "path": "tests/unit/backend/native/models/classification/utils/test_ignored_labels.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[1], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[1], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])",
-      "severity": "error"
-    },
-    {
-      "line": 14,
-      "column": 18,
-      "stop_line": 14,
-      "stop_column": 89,
-      "path": "tests/unit/backend/native/models/classification/utils/test_ignored_labels.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[2], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (ignored_labels=list[int], img_idx=Literal[2], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])",
-      "severity": "error"
-    },
-    {
-      "line": 53,
-      "column": 33,
-      "stop_line": 53,
-      "stop_column": 36,
-      "path": "tests/unit/backend/native/models/common/backbones/test_pytorchcv_backbones.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `dict[str, str]` is not assignable to parameter `normalization` with type `(...) -> Module` in function `otx.backend.native.models.common.backbones.pytorchcv_backbones.replace_norm`",
-      "concise_description": "Argument `dict[str, str]` is not assignable to parameter `normalization` with type `(...) -> Module` in function `otx.backend.native.models.common.backbones.pytorchcv_backbones.replace_norm`",
-      "severity": "error"
-    },
-    {
-      "line": 21,
-      "column": 24,
-      "stop_line": 21,
-      "stop_column": 43,
-      "path": "tests/unit/backend/native/models/common/test_iou2d_calculator.py",
-      "code": -2,
-      "name": "bad-assignment",
-      "description": "`signedinteger[_64Bit]` is not assignable to variable `num_bbox` with type `int | None`",
-      "concise_description": "`signedinteger[_64Bit]` is not assignable to variable `num_bbox` with type `int | None`",
-      "severity": "error"
-    },
-    {
-      "line": 22,
-      "column": 26,
-      "stop_line": 22,
-      "stop_column": 41,
-      "path": "tests/unit/backend/native/models/common/test_iou2d_calculator.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C._VariableFunctions.rand` called with arguments: (tuple[int | None, Literal[2]])\n  Possible overloads:\n  (size: Sequence[SymInt | int], *, generator: Generator | None, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, generator: Generator | None, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor [closest match]\n  (*size: SymInt | int, *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor",
-      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.rand` called with arguments: (tuple[int | None, Literal[2]])",
-      "severity": "error"
-    },
-    {
-      "line": 23,
-      "column": 36,
-      "stop_line": 23,
-      "stop_column": 51,
-      "path": "tests/unit/backend/native/models/common/test_iou2d_calculator.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C._VariableFunctions.rand` called with arguments: (tuple[int | None, Literal[2]])\n  Possible overloads:\n  (size: Sequence[SymInt | int], *, generator: Generator | None, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, generator: Generator | None, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor [closest match]\n  (*size: SymInt | int, *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (size: Sequence[SymInt | int], *, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (*size: SymInt | int, *, names: Sequence[EllipsisType | str | None] | None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor",
-      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.rand` called with arguments: (tuple[int | None, Literal[2]])",
-      "severity": "error"
-    },
-    {
-      "line": 27,
-      "column": 16,
-      "stop_line": 27,
-      "stop_column": 32,
-      "path": "tests/unit/backend/native/models/common/test_iou2d_calculator.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `tuple[Tensor, int | None]` is not assignable to declared return type `tuple[Tensor, int]`",
-      "concise_description": "Returned type `tuple[Tensor, int | None]` is not assignable to declared return type `tuple[Tensor, int]`",
-      "severity": "error"
-    },
-    {
-      "line": 35,
-      "column": 35,
-      "stop_line": 35,
-      "stop_column": 51,
-      "path": "tests/unit/backend/native/models/detection/backbones/test_presnet.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `(normalization: ((...) -> Module) | Module | tuple[str, Module] | None, num_features: int, postfix: int | str = '', layer_name: str | None = None, requires_grad: bool = True, eps: float = ..., **kwargs: Unknown) -> tuple[str, Module]` is not assignable to parameter `func` with type `(...) -> Module` in function `functools.partial.__new__`",
-      "concise_description": "Argument `(normalization: ((...) -> Module) | Module | tuple[str, Module] | None, num_features: int, postfix: int | str = '', layer_name: str | None = None, requires_grad: bool = True, eps: float = ..., **kwargs: Unknown) -> tuple[str, Module]` is not assignable to parameter `func` with type `(...) -> Module` in function `functools.partial.__new__`",
-      "severity": "error"
-    },
-    {
-      "line": 467,
-      "column": 9,
-      "stop_line": 467,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/models/detection/backbones/test_vit_tiny.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `int` has no attribute `backward`",
-      "concise_description": "Object of class `int` has no attribute `backward`",
-      "severity": "error"
-    },
-    {
-      "line": 479,
-      "column": 9,
-      "stop_line": 479,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/models/detection/backbones/test_vit_tiny.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `int` has no attribute `backward`",
-      "concise_description": "Object of class `int` has no attribute `backward`",
-      "severity": "error"
-    },
-    {
-      "line": 20,
-      "column": 33,
-      "stop_line": 24,
-      "stop_column": 10,
-      "path": "tests/unit/backend/native/models/detection/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])",
-      "severity": "error"
-    },
-    {
-      "line": 25,
-      "column": 33,
-      "stop_line": 29,
-      "stop_column": 10,
-      "path": "tests/unit/backend/native/models/detection/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])",
-      "severity": "error"
-    },
-    {
-      "line": 38,
-      "column": 18,
-      "stop_line": 38,
-      "stop_column": 73,
-      "path": "tests/unit/backend/native/models/detection/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
-      "severity": "error"
-    },
-    {
-      "line": 39,
-      "column": 18,
-      "stop_line": 39,
-      "stop_column": 73,
-      "path": "tests/unit/backend/native/models/detection/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
-      "severity": "error"
-    },
-    {
-      "line": 84,
-      "column": 37,
-      "stop_line": 84,
-      "stop_column": 59,
-      "path": "tests/unit/backend/native/models/detection/detectors/test_detr.py",
-      "code": -2,
-      "name": "implicit-import",
-      "description": "Module `torchvision.tv_tensors` exists, but was not imported explicitly. You are relying on other modules to load it.",
-      "concise_description": "Module `torchvision.tv_tensors` exists, but was not imported explicitly. You are relying on other modules to load it.",
-      "severity": "error"
-    },
-    {
-      "line": 53,
-      "column": 23,
-      "stop_line": 53,
-      "stop_column": 83,
-      "path": "tests/unit/backend/native/models/detection/detectors/test_single_stage_detector.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `ImageInfo` is not assignable to parameter `imgs_info` with type `Sequence[ImageInfo | None] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "concise_description": "Argument `ImageInfo` is not assignable to parameter `imgs_info` with type `Sequence[ImageInfo | None] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 53,
-      "column": 32,
-      "stop_line": 53,
-      "stop_column": 83,
-      "path": "tests/unit/backend/native/models/detection/detectors/test_single_stage_detector.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[32], Literal[32]], ori_shape=tuple[Literal[32], Literal[32]])",
-      "severity": "error"
-    },
-    {
-      "line": 55,
-      "column": 20,
-      "stop_line": 55,
-      "stop_column": 58,
-      "path": "tests/unit/backend/native/models/detection/detectors/test_single_stage_detector.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXPredictionBatch.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 28,
-      "column": 24,
-      "stop_line": 31,
-      "stop_column": 14,
-      "path": "tests/unit/backend/native/models/detection/heads/test_class_incremental_mixin.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.detection.losses.atss_loss.ATSSCriterion.__init__`",
-      "concise_description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.detection.losses.atss_loss.ATSSCriterion.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 41,
-      "column": 30,
-      "stop_line": 51,
-      "stop_column": 14,
-      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `SSDAnchorGeneratorClustered` is not assignable to parameter `anchor_generator` with type `Module` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
-      "concise_description": "Argument `SSDAnchorGeneratorClustered` is not assignable to parameter `anchor_generator` with type `Module` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 52,
-      "column": 24,
-      "stop_line": 55,
-      "stop_column": 14,
-      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
-      "concise_description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 60,
-      "column": 23,
-      "stop_line": 60,
-      "stop_column": 32,
-      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DictConfig` is not assignable to parameter `train_cfg` with type `dict[Unknown, Unknown]` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
-      "concise_description": "Argument `DictConfig` is not assignable to parameter `train_cfg` with type `dict[Unknown, Unknown]` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 61,
-      "column": 22,
-      "stop_line": 61,
-      "stop_column": 30,
-      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DictConfig` is not assignable to parameter `test_cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
-      "concise_description": "Argument `DictConfig` is not assignable to parameter `test_cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.ssd_head.SSDHeadModule.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 71,
-      "column": 16,
-      "stop_line": 71,
-      "stop_column": 48,
-      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `BasePriorGenerator` has no attribute `widths`",
-      "concise_description": "Object of class `BasePriorGenerator` has no attribute `widths`",
-      "severity": "error"
-    },
-    {
-      "line": 75,
-      "column": 16,
-      "stop_line": 75,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `BasePriorGenerator` has no attribute `heights`",
-      "concise_description": "Object of class `BasePriorGenerator` has no attribute `heights`",
-      "severity": "error"
-    },
-    {
-      "line": 80,
-      "column": 16,
-      "stop_line": 80,
-      "stop_column": 42,
-      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `BaseBBoxCoder` has no attribute `means`",
-      "concise_description": "Object of class `BaseBBoxCoder` has no attribute `means`",
-      "severity": "error"
-    },
-    {
-      "line": 81,
-      "column": 16,
-      "stop_line": 81,
-      "stop_column": 41,
-      "path": "tests/unit/backend/native/models/detection/heads/test_ssd_head.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `BaseBBoxCoder` has no attribute `stds`",
-      "concise_description": "Object of class `BaseBBoxCoder` has no attribute `stds`",
-      "severity": "error"
-    },
-    {
-      "line": 28,
-      "column": 109,
-      "stop_line": 28,
-      "stop_column": 117,
-      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DictConfig` is not assignable to parameter `test_cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.__init__`",
-      "concise_description": "Argument `DictConfig` is not assignable to parameter `test_cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 30,
-      "column": 61,
-      "stop_line": 30,
-      "stop_column": 65,
-      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.forward`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.forward`",
-      "severity": "error"
-    },
-    {
-      "line": 31,
-      "column": 83,
-      "stop_line": 31,
-      "stop_column": 91,
-      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DictConfig` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.predict_by_feat`",
-      "concise_description": "Argument `DictConfig` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.predict_by_feat`",
-      "severity": "error"
-    },
-    {
-      "line": 37,
-      "column": 17,
-      "stop_line": 37,
-      "stop_column": 25,
-      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DictConfig` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.predict_by_feat`",
-      "concise_description": "Argument `DictConfig` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown] | None` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.predict_by_feat`",
-      "severity": "error"
-    },
-    {
-      "line": 55,
-      "column": 27,
-      "stop_line": 55,
-      "stop_column": 59,
-      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 66,
-      "column": 47,
-      "stop_line": 66,
-      "stop_column": 51,
-      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
-      "severity": "error"
-    },
-    {
-      "line": 85,
-      "column": 27,
-      "stop_line": 85,
-      "stop_column": 59,
-      "path": "tests/unit/backend/native/models/detection/heads/test_yolox_head.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 39,
-      "column": 16,
-      "stop_line": 88,
-      "stop_column": 10,
-      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `dict[str, Tensor | dict[str, Tensor] | list[@_]]` is not assignable to declared return type `dict[str, Tensor]`",
-      "concise_description": "Returned type `dict[str, Tensor | dict[str, Tensor] | list[@_]]` is not assignable to declared return type `dict[str, Tensor]`",
-      "severity": "error"
-    },
-    {
-      "line": 129,
-      "column": 65,
-      "stop_line": 129,
-      "stop_column": 72,
-      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.deim_loss.DEIMCriterion.loss_labels_mal`",
-      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.deim_loss.DEIMCriterion.loss_labels_mal`",
-      "severity": "error"
-    },
-    {
-      "line": 140,
-      "column": 65,
-      "stop_line": 140,
-      "stop_column": 72,
-      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_labels_vfl`",
-      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_labels_vfl`",
-      "severity": "error"
-    },
-    {
-      "line": 151,
-      "column": 60,
-      "stop_line": 151,
-      "stop_column": 67,
-      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_boxes`",
-      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_boxes`",
-      "severity": "error"
-    },
-    {
-      "line": 163,
-      "column": 60,
-      "stop_line": 163,
-      "stop_column": 67,
-      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_local`",
-      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_local`",
-      "severity": "error"
-    },
-    {
-      "line": 215,
-      "column": 64,
-      "stop_line": 215,
-      "stop_column": 71,
-      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.deim_loss.DEIMCriterion.loss_labels_mal`",
-      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.deim_loss.DEIMCriterion.loss_labels_mal`",
-      "severity": "error"
-    },
-    {
-      "line": 218,
-      "column": 64,
-      "stop_line": 218,
-      "stop_column": 71,
-      "path": "tests/unit/backend/native/models/detection/losses/test_deim_loss.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_labels_vfl`",
-      "concise_description": "Argument `list[tuple[Tensor, Tensor]]` is not assignable to parameter `indices` with type `list[tuple[int, int]]` in function `otx.backend.native.models.detection.losses.dfine_loss.DFINECriterion.loss_labels_vfl`",
-      "severity": "error"
-    },
-    {
-      "line": 37,
-      "column": 45,
-      "stop_line": 37,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/losses/test_yolox_loss.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
-      "severity": "error"
-    },
-    {
-      "line": 63,
-      "column": 45,
-      "stop_line": 63,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/losses/test_yolox_loss.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
-      "severity": "error"
-    },
-    {
-      "line": 84,
-      "column": 45,
-      "stop_line": 84,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/losses/test_yolox_loss.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.detection.heads.yolox_head.YOLOXHeadModule.prepare_loss_inputs`",
-      "severity": "error"
-    },
-    {
-      "line": 328,
-      "column": 9,
-      "stop_line": 328,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/models/detection/necks/test_hybrid_encoder.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `int` has no attribute `backward`",
-      "concise_description": "Object of class `int` has no attribute `backward`",
-      "severity": "error"
-    },
-    {
-      "line": 117,
-      "column": 21,
-      "stop_line": 117,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/models/detection/test_atss.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
-      "severity": "error"
-    },
-    {
-      "line": 47,
-      "column": 16,
-      "stop_line": 47,
-      "stop_column": 40,
-      "path": "tests/unit/backend/native/models/detection/test_base.py",
-      "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `DictConfig`",
-      "concise_description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `DictConfig`",
-      "severity": "error"
-    },
-    {
-      "line": 32,
-      "column": 24,
-      "stop_line": 32,
-      "stop_column": 34,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 72,
-      "column": 20,
-      "stop_line": 72,
-      "stop_column": 57,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
-      "severity": "error"
-    },
-    {
-      "line": 76,
-      "column": 24,
-      "stop_line": 76,
-      "stop_column": 61,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
-      "severity": "error"
-    },
-    {
-      "line": 78,
-      "column": 24,
-      "stop_line": 78,
-      "stop_column": 61,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
-      "severity": "error"
-    },
-    {
-      "line": 93,
-      "column": 24,
-      "stop_line": 93,
-      "stop_column": 34,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 125,
-      "column": 24,
-      "stop_line": 125,
-      "stop_column": 34,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 149,
-      "column": 24,
-      "stop_line": 149,
-      "stop_column": 34,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 206,
-      "column": 20,
-      "stop_line": 206,
-      "stop_column": 55,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`in` is not supported between `str` and `Module`\n  Expected `__contains__` to be a callable, got `Module | Tensor`",
-      "concise_description": "`in` is not supported between `str` and `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 209,
-      "column": 16,
-      "stop_line": 209,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 209,
-      "column": 38,
-      "stop_line": 209,
-      "stop_column": 48,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Tensor`\n  Argument `Literal['loss_vfl']` is not assignable to parameter `indices` with type `EllipsisType | SupportsIndex | Tensor | _NestedSequence[EllipsisType | Tensor | bool | int | slice[Any, Any, Any] | None] | bool | int | slice[Any, Any, Any] | tuple[_Index, ...] | None` in function `torch._C.TensorBase.__getitem__`",
-      "concise_description": "Cannot index into `Tensor`",
-      "severity": "error"
-    },
-    {
-      "line": 210,
-      "column": 16,
-      "stop_line": 210,
-      "stop_column": 50,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 211,
-      "column": 16,
-      "stop_line": 211,
-      "stop_column": 50,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 212,
-      "column": 16,
-      "stop_line": 212,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 213,
-      "column": 16,
-      "stop_line": 213,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 214,
-      "column": 16,
-      "stop_line": 214,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/test_deim.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 33,
-      "column": 24,
-      "stop_line": 33,
-      "stop_column": 34,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 81,
-      "column": 20,
-      "stop_line": 81,
-      "stop_column": 57,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
-      "severity": "error"
-    },
-    {
-      "line": 98,
-      "column": 24,
-      "stop_line": 98,
-      "stop_column": 34,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 105,
-      "column": 16,
-      "stop_line": 105,
-      "stop_column": 56,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 146,
-      "column": 24,
-      "stop_line": 146,
-      "stop_column": 34,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 176,
-      "column": 24,
-      "stop_line": 176,
-      "stop_column": 34,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
-      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 238,
-      "column": 20,
-      "stop_line": 238,
-      "stop_column": 55,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`in` is not supported between `str` and `Module`\n  Expected `__contains__` to be a callable, got `Module | Tensor`",
-      "concise_description": "`in` is not supported between `str` and `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 241,
-      "column": 16,
-      "stop_line": 241,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 241,
-      "column": 38,
-      "stop_line": 241,
-      "stop_column": 48,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Tensor`\n  Argument `Literal['loss_vfl']` is not assignable to parameter `indices` with type `EllipsisType | SupportsIndex | Tensor | _NestedSequence[EllipsisType | Tensor | bool | int | slice[Any, Any, Any] | None] | bool | int | slice[Any, Any, Any] | tuple[_Index, ...] | None` in function `torch._C.TensorBase.__getitem__`",
-      "concise_description": "Cannot index into `Tensor`",
-      "severity": "error"
-    },
-    {
-      "line": 242,
-      "column": 16,
-      "stop_line": 242,
-      "stop_column": 50,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 243,
-      "column": 16,
-      "stop_line": 243,
-      "stop_column": 50,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 244,
-      "column": 16,
-      "stop_line": 244,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 245,
-      "column": 16,
-      "stop_line": 245,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 246,
-      "column": 16,
-      "stop_line": 246,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
-      "concise_description": "Cannot index into `Module`",
-      "severity": "error"
-    },
-    {
-      "line": 401,
-      "column": 20,
-      "stop_line": 401,
-      "stop_column": 30,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
-      "severity": "error"
-    },
-    {
-      "line": 404,
-      "column": 28,
-      "stop_line": 404,
-      "stop_column": 41,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 405,
-      "column": 24,
-      "stop_line": 405,
-      "stop_column": 37,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 406,
-      "column": 28,
-      "stop_line": 406,
-      "stop_column": 41,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 409,
-      "column": 28,
-      "stop_line": 409,
-      "stop_column": 41,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 410,
-      "column": 24,
-      "stop_line": 410,
-      "stop_column": 37,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 411,
-      "column": 16,
-      "stop_line": 411,
-      "stop_column": 29,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 414,
-      "column": 28,
-      "stop_line": 414,
-      "stop_column": 41,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 415,
-      "column": 16,
-      "stop_line": 415,
-      "stop_column": 29,
-      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
-      "severity": "error"
-    },
-    {
-      "line": 136,
-      "column": 37,
-      "stop_line": 136,
-      "stop_column": 59,
-      "path": "tests/unit/backend/native/models/detection/test_dfine.py",
-      "code": -2,
-      "name": "implicit-import",
-      "description": "Module `torchvision.tv_tensors` exists, but was not imported explicitly. You are relying on other modules to load it.",
-      "concise_description": "Module `torchvision.tv_tensors` exists, but was not imported explicitly. You are relying on other modules to load it.",
-      "severity": "error"
-    },
-    {
-      "line": 27,
-      "column": 33,
-      "stop_line": 27,
-      "stop_column": 37,
-      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `None` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
-      "concise_description": "Argument `None` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
-      "severity": "error"
-    },
-    {
-      "line": 36,
-      "column": 26,
-      "stop_line": 36,
-      "stop_column": 81,
-      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[320], Literal[320]], ori_shape=tuple[Literal[320], Literal[320]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[320], Literal[320]], ori_shape=tuple[Literal[320], Literal[320]])",
-      "severity": "error"
-    },
-    {
-      "line": 37,
-      "column": 26,
-      "stop_line": 37,
-      "stop_column": 81,
-      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[320], Literal[320]], ori_shape=tuple[Literal[320], Literal[320]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[320], Literal[320]], ori_shape=tuple[Literal[320], Literal[320]])",
-      "severity": "error"
-    },
-    {
-      "line": 40,
-      "column": 20,
-      "stop_line": 49,
-      "stop_column": 14,
-      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 66,
-      "column": 35,
-      "stop_line": 70,
-      "stop_column": 10,
-      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `(*_: Unknown) -> tuple[Unknown, Unknown, Unknown]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
-      "concise_description": "Argument `(*_: Unknown) -> tuple[Unknown, Unknown, Unknown]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
-      "severity": "error"
-    },
-    {
-      "line": 101,
-      "column": 42,
-      "stop_line": 101,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `dict` has no attribute `data`",
-      "concise_description": "Object of class `dict` has no attribute `data`",
-      "severity": "error"
-    },
-    {
-      "line": 146,
-      "column": 9,
-      "stop_line": 146,
-      "stop_column": 29,
-      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `FunctionType` has no attribute `training`",
-      "concise_description": "Object of class `FunctionType` has no attribute `training`",
-      "severity": "error"
-    },
-    {
-      "line": 147,
-      "column": 21,
-      "stop_line": 147,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
-      "severity": "error"
-    },
-    {
-      "line": 109,
-      "column": 21,
-      "stop_line": 109,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/models/detection/test_rtmdet.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
-      "severity": "error"
-    },
-    {
-      "line": 57,
-      "column": 16,
-      "stop_line": 57,
-      "stop_column": 61,
-      "path": "tests/unit/backend/native/models/detection/test_ssd.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `anchor_generator`",
-      "concise_description": "Object of class `Tensor` has no attribute `anchor_generator`",
-      "severity": "error"
-    },
-    {
-      "line": 58,
-      "column": 16,
-      "stop_line": 58,
-      "stop_column": 61,
-      "path": "tests/unit/backend/native/models/detection/test_ssd.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `anchor_generator`",
-      "concise_description": "Object of class `Tensor` has no attribute `anchor_generator`",
-      "severity": "error"
-    },
-    {
-      "line": 112,
-      "column": 25,
-      "stop_line": 112,
-      "stop_column": 26,
-      "path": "tests/unit/backend/native/models/detection/test_ssd.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
-      "severity": "error"
-    },
-    {
-      "line": 193,
-      "column": 21,
-      "stop_line": 193,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/models/detection/test_yolox.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
-      "severity": "error"
-    },
-    {
-      "line": 15,
-      "column": 21,
-      "stop_line": 15,
-      "stop_column": 29,
-      "path": "tests/unit/backend/native/models/detection/utils/prior_generators/test_custom_anchor_generator.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `tuple[Literal[16], Literal[32]]` is not assignable to parameter `strides` with type `list[int]` in function `otx.backend.native.models.detection.utils.prior_generators.anchor_generator.SSDAnchorGeneratorClustered.__init__`",
-      "concise_description": "Argument `tuple[Literal[16], Literal[32]]` is not assignable to parameter `strides` with type `list[int]` in function `otx.backend.native.models.detection.utils.prior_generators.anchor_generator.SSDAnchorGeneratorClustered.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 20,
-      "column": 33,
-      "stop_line": 24,
-      "stop_column": 10,
-      "path": "tests/unit/backend/native/models/instance_segmentation/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])",
-      "severity": "error"
-    },
-    {
-      "line": 25,
-      "column": 33,
-      "stop_line": 29,
-      "stop_column": 10,
-      "path": "tests/unit/backend/native/models/instance_segmentation/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])",
-      "severity": "error"
-    },
-    {
-      "line": 44,
-      "column": 18,
-      "stop_line": 44,
-      "stop_column": 73,
-      "path": "tests/unit/backend/native/models/instance_segmentation/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
-      "severity": "error"
-    },
-    {
-      "line": 45,
-      "column": 18,
-      "stop_line": 45,
-      "stop_column": 73,
-      "path": "tests/unit/backend/native/models/instance_segmentation/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
-      "severity": "error"
-    },
-    {
-      "line": 129,
-      "column": 9,
-      "stop_line": 129,
-      "stop_column": 20,
-      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "Cannot set item in `list[ndarray[tuple[int], dtype[Any]]]`\n  No matching overload found for function `list.__setitem__` called with arguments: (Literal[0], ndarray[tuple[int, ...], dtype[Any]])\n  Possible overloads:\n  (key: SupportsIndex, value: ndarray[tuple[int], dtype[Any]], /) -> None [closest match]\n  (key: slice[Any, Any, Any], value: Iterable[ndarray[tuple[int], dtype[Any]]], /) -> None",
-      "concise_description": "Cannot set item in `list[ndarray[tuple[int], dtype[Any]]]`",
-      "severity": "error"
-    },
-    {
-      "line": 130,
-      "column": 9,
-      "stop_line": 130,
-      "stop_column": 20,
-      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
-      "code": -2,
-      "name": "unsupported-operation",
-      "description": "Cannot set item in `list[ndarray[tuple[int], dtype[Any]]]`\n  No matching overload found for function `list.__setitem__` called with arguments: (Literal[1], ndarray[tuple[int, ...], dtype[Any]])\n  Possible overloads:\n  (key: SupportsIndex, value: ndarray[tuple[int], dtype[Any]], /) -> None [closest match]\n  (key: slice[Any, Any, Any], value: Iterable[ndarray[tuple[int], dtype[Any]]], /) -> None",
-      "concise_description": "Cannot set item in `list[ndarray[tuple[int], dtype[Any]]]`",
-      "severity": "error"
-    },
-    {
-      "line": 135,
-      "column": 26,
-      "stop_line": 135,
-      "stop_column": 73,
-      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
-      "severity": "error"
-    },
-    {
-      "line": 136,
-      "column": 26,
-      "stop_line": 136,
-      "stop_column": 73,
-      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
-      "severity": "error"
-    },
-    {
-      "line": 138,
-      "column": 20,
-      "stop_line": 138,
-      "stop_column": 58,
-      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 140,
-      "column": 19,
-      "stop_line": 140,
-      "stop_column": 71,
-      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 143,
-      "column": 55,
-      "stop_line": 143,
-      "stop_column": 56,
-      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `tuple[Tensor, Tensor, Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.instance_segmentation.heads.rtmdet_inst_head.RTMDetInstHead.prepare_loss_inputs`",
-      "concise_description": "Argument `tuple[Tensor, Tensor, Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.instance_segmentation.heads.rtmdet_inst_head.RTMDetInstHead.prepare_loss_inputs`",
-      "severity": "error"
-    },
-    {
-      "line": 27,
-      "column": 16,
-      "stop_line": 27,
-      "stop_column": 76,
-      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 31,
-      "column": 22,
-      "stop_line": 36,
-      "stop_column": 14,
-      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[480], Literal[480]], ori_shape=tuple[Literal[480], Literal[480]], ignored_labels=list[@_])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[480], Literal[480]], ori_shape=tuple[Literal[480], Literal[480]], ignored_labels=list[@_])",
-      "severity": "error"
-    },
-    {
-      "line": 45,
-      "column": 16,
-      "stop_line": 45,
-      "stop_column": 76,
-      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 49,
-      "column": 22,
-      "stop_line": 54,
-      "stop_column": 14,
-      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[480], Literal[480]], ori_shape=tuple[Literal[480], Literal[480]], ignored_labels=list[int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[480], Literal[480]], ori_shape=tuple[Literal[480], Literal[480]], ignored_labels=list[int])",
-      "severity": "error"
-    },
-    {
-      "line": 92,
-      "column": 24,
-      "stop_line": 95,
-      "stop_column": 14,
-      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.instance_segmentation.losses.roi_loss.ROICriterion.__init__`",
-      "concise_description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.instance_segmentation.losses.roi_loss.ROICriterion.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 108,
-      "column": 13,
-      "stop_line": 108,
-      "stop_column": 56,
-      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `prepare_loss_inputs`",
-      "concise_description": "Object of class `Tensor` has no attribute `prepare_loss_inputs`",
-      "severity": "error"
-    },
-    {
-      "line": 132,
-      "column": 13,
-      "stop_line": 132,
-      "stop_column": 56,
-      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `prepare_loss_inputs`",
-      "concise_description": "Object of class `Tensor` has no attribute `prepare_loss_inputs`",
-      "severity": "error"
-    },
-    {
-      "line": 18,
-      "column": 33,
-      "stop_line": 18,
-      "stop_column": 53,
-      "path": "tests/unit/backend/native/models/keypoint_detection/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)",
-      "severity": "error"
-    },
-    {
-      "line": 19,
-      "column": 27,
-      "stop_line": 19,
-      "stop_column": 82,
-      "path": "tests/unit/backend/native/models/keypoint_detection/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[192], Literal[256]], ori_shape=tuple[Literal[192], Literal[256]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[192], Literal[256]], ori_shape=tuple[Literal[192], Literal[256]])",
-      "severity": "error"
-    },
-    {
-      "line": 20,
-      "column": 38,
-      "stop_line": 25,
-      "stop_column": 6,
-      "path": "tests/unit/backend/native/models/keypoint_detection/conftest.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (list[list[int]], format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[192], Literal[256]], dtype=dtype)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (list[list[int]], format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[192], Literal[256]], dtype=dtype)",
-      "severity": "error"
-    },
-    {
-      "line": 26,
-      "column": 31,
-      "stop_line": 26,
-      "stop_column": 86,
-      "path": "tests/unit/backend/native/models/keypoint_detection/heads/test_rtmcc_head.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[192], Literal[256]], ori_shape=tuple[Literal[192], Literal[256]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[192], Literal[256]], ori_shape=tuple[Literal[192], Literal[256]])",
-      "severity": "error"
-    },
-    {
-      "line": 31,
-      "column": 36,
-      "stop_line": 31,
-      "stop_column": 81,
-      "path": "tests/unit/backend/native/models/keypoint_detection/heads/test_rtmcc_head.py",
-      "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)",
-      "severity": "error"
-    },
-    {
-      "line": 110,
-      "column": 20,
-      "stop_line": 110,
-      "stop_column": 48,
-      "path": "tests/unit/backend/native/models/keypoint_detection/test_base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
-      "severity": "error"
-    },
-    {
-      "line": 110,
-      "column": 57,
-      "stop_line": 110,
-      "stop_column": 82,
-      "path": "tests/unit/backend/native/models/keypoint_detection/test_base.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
-      "severity": "error"
-    },
-    {
-      "line": 49,
-      "column": 13,
-      "stop_line": 49,
-      "stop_column": 24,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "not-callable",
-      "description": "Expected a callable, got `None`",
-      "concise_description": "Expected a callable, got `None`",
-      "severity": "error"
-    },
-    {
-      "line": 50,
-      "column": 16,
-      "stop_line": 50,
-      "stop_column": 27,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "not-callable",
-      "description": "Expected a callable, got `None`",
-      "concise_description": "Expected a callable, got `None`",
-      "severity": "error"
-    },
-    {
-      "line": 196,
-      "column": 13,
-      "stop_line": 196,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `assert_not_called`",
-      "concise_description": "Object of class `Tensor` has no attribute `assert_not_called`",
-      "severity": "error"
-    },
-    {
-      "line": 213,
-      "column": 9,
-      "stop_line": 213,
-      "stop_column": 33,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `is_init`",
-      "concise_description": "Object of class `NoneType` has no attribute `is_init`",
-      "severity": "error"
-    },
-    {
-      "line": 214,
-      "column": 29,
-      "stop_line": 214,
-      "stop_column": 33,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal[True]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
-      "concise_description": "Argument `Literal[True]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
-      "severity": "error"
-    },
-    {
-      "line": 216,
-      "column": 9,
-      "stop_line": 216,
-      "stop_column": 38,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `init_weights`",
-      "concise_description": "Object of class `NoneType` has no attribute `init_weights`",
-      "severity": "error"
-    },
-    {
-      "line": 217,
-      "column": 9,
-      "stop_line": 217,
-      "stop_column": 38,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `init_weights`",
-      "concise_description": "Object of class `NoneType` has no attribute `init_weights`",
-      "severity": "error"
-    },
-    {
-      "line": 218,
-      "column": 9,
-      "stop_line": 218,
-      "stop_column": 38,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `init_weights`",
-      "concise_description": "Object of class `NoneType` has no attribute `init_weights`",
-      "severity": "error"
-    },
-    {
-      "line": 219,
-      "column": 9,
-      "stop_line": 219,
-      "stop_column": 38,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `init_weights`",
-      "concise_description": "Object of class `NoneType` has no attribute `init_weights`",
-      "severity": "error"
-    },
-    {
-      "line": 220,
-      "column": 9,
-      "stop_line": 220,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `assert_not_called`",
-      "concise_description": "Object of class `Tensor` has no attribute `assert_not_called`",
-      "severity": "error"
-    },
-    {
-      "line": 231,
-      "column": 28,
-      "stop_line": 231,
-      "stop_column": 54,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 231,
-      "column": 67,
-      "stop_line": 231,
-      "stop_column": 93,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 232,
-      "column": 28,
-      "stop_line": 232,
-      "stop_column": 52,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 232,
-      "column": 65,
-      "stop_line": 232,
-      "stop_column": 89,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 233,
-      "column": 28,
-      "stop_line": 233,
-      "stop_column": 54,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 233,
-      "column": 67,
-      "stop_line": 233,
-      "stop_column": 93,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 234,
-      "column": 28,
-      "stop_line": 234,
-      "stop_column": 52,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 234,
-      "column": 65,
-      "stop_line": 234,
-      "stop_column": 89,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 245,
-      "column": 28,
-      "stop_line": 245,
-      "stop_column": 54,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 245,
-      "column": 67,
-      "stop_line": 245,
-      "stop_column": 93,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 246,
-      "column": 28,
-      "stop_line": 246,
-      "stop_column": 52,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 246,
-      "column": 65,
-      "stop_line": 246,
-      "stop_column": 89,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 247,
-      "column": 28,
-      "stop_line": 247,
-      "stop_column": 54,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 247,
-      "column": 67,
-      "stop_line": 247,
-      "stop_column": 93,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 248,
-      "column": 28,
-      "stop_line": 248,
-      "stop_column": 52,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 248,
-      "column": 65,
-      "stop_line": 248,
-      "stop_column": 89,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 260,
-      "column": 13,
-      "stop_line": 260,
-      "stop_column": 51,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 261,
-      "column": 24,
-      "stop_line": 261,
-      "stop_column": 62,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 264,
-      "column": 13,
-      "stop_line": 264,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 265,
-      "column": 24,
-      "stop_line": 265,
-      "stop_column": 60,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 268,
-      "column": 13,
-      "stop_line": 268,
-      "stop_column": 51,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 269,
-      "column": 24,
-      "stop_line": 269,
-      "stop_column": 62,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 272,
-      "column": 13,
-      "stop_line": 272,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 273,
-      "column": 24,
-      "stop_line": 273,
-      "stop_column": 60,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 286,
-      "column": 13,
-      "stop_line": 286,
-      "stop_column": 51,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 287,
-      "column": 24,
-      "stop_line": 287,
-      "stop_column": 62,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 290,
-      "column": 13,
-      "stop_line": 290,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 47,
+      "column": 16,
+      "stop_line": 47,
+      "stop_column": 40,
+      "path": "tests/unit/backend/native/models/detection/test_base.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "bad-return",
+      "description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `DictConfig`",
+      "concise_description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `DictConfig`",
       "severity": "error"
     },
     {
-      "line": 291,
+      "line": 32,
       "column": 24,
-      "stop_line": 291,
-      "stop_column": 60,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "stop_line": 32,
+      "stop_column": 34,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "bad-argument-type",
+      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
+      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
       "severity": "error"
     },
     {
-      "line": 294,
-      "column": 13,
-      "stop_line": 294,
-      "stop_column": 51,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 72,
+      "column": 20,
+      "stop_line": 72,
+      "stop_column": 57,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 295,
+      "line": 76,
       "column": 24,
-      "stop_line": 295,
-      "stop_column": 62,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 298,
-      "column": 13,
-      "stop_line": 298,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "stop_line": 76,
+      "stop_column": 61,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 299,
+      "line": 78,
       "column": 24,
-      "stop_line": 299,
-      "stop_column": 60,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
-      "severity": "error"
-    },
-    {
-      "line": 311,
-      "column": 28,
-      "stop_line": 311,
-      "stop_column": 54,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 311,
-      "column": 67,
-      "stop_line": 311,
-      "stop_column": 93,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
-      "severity": "error"
-    },
-    {
-      "line": 312,
-      "column": 28,
-      "stop_line": 312,
-      "stop_column": 52,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "stop_line": 78,
+      "stop_column": 61,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 312,
-      "column": 65,
-      "stop_line": 312,
-      "stop_column": 89,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 93,
+      "column": 24,
+      "stop_line": 93,
+      "stop_column": 34,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "bad-argument-type",
+      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
+      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
       "severity": "error"
     },
     {
-      "line": 313,
-      "column": 28,
-      "stop_line": 313,
-      "stop_column": 54,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 125,
+      "column": 24,
+      "stop_line": 125,
+      "stop_column": 34,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
+      "name": "bad-argument-type",
+      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
+      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
       "severity": "error"
     },
     {
-      "line": 313,
-      "column": 67,
-      "stop_line": 313,
-      "stop_column": 93,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 149,
+      "column": 24,
+      "stop_line": 149,
+      "stop_column": 34,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
+      "name": "bad-argument-type",
+      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
+      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deim_dfine_hgnetv2_l', 'deim_dfine_hgnetv2_m', 'deim_dfine_hgnetv2_n', 'deim_dfine_hgnetv2_s', 'deim_dfine_hgnetv2_x']` in function `otx.backend.native.models.detection.deim.DEIMDFine.__init__`",
       "severity": "error"
     },
     {
-      "line": 314,
-      "column": 28,
-      "stop_line": 314,
-      "stop_column": 52,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 206,
+      "column": 20,
+      "stop_line": 206,
+      "stop_column": 55,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "unsupported-operation",
+      "description": "`in` is not supported between `str` and `Module`\n  Expected `__contains__` to be a callable, got `Module | Tensor`",
+      "concise_description": "`in` is not supported between `str` and `Module`",
       "severity": "error"
     },
     {
-      "line": 314,
-      "column": 65,
-      "stop_line": 314,
-      "stop_column": 89,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 209,
+      "column": 16,
+      "stop_line": 209,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 325,
-      "column": 28,
-      "stop_line": 325,
-      "stop_column": 54,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 209,
+      "column": 38,
+      "stop_line": 209,
+      "stop_column": 48,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
+      "name": "bad-index",
+      "description": "Cannot index into `Tensor`\n  Argument `Literal['loss_vfl']` is not assignable to parameter `indices` with type `EllipsisType | SupportsIndex | Tensor | _NestedSequence[EllipsisType | Tensor | bool | int | slice[Any, Any, Any] | None] | bool | int | slice[Any, Any, Any] | tuple[_Index, ...] | None` in function `torch._C.TensorBase.__getitem__`",
+      "concise_description": "Cannot index into `Tensor`",
       "severity": "error"
     },
     {
-      "line": 325,
-      "column": 67,
-      "stop_line": 325,
-      "stop_column": 93,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 210,
+      "column": 16,
+      "stop_line": 210,
+      "stop_column": 50,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 326,
-      "column": 28,
-      "stop_line": 326,
-      "stop_column": 52,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 211,
+      "column": 16,
+      "stop_line": 211,
+      "stop_column": 50,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 326,
-      "column": 65,
-      "stop_line": 326,
-      "stop_column": 89,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 212,
+      "column": 16,
+      "stop_line": 212,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 327,
-      "column": 28,
-      "stop_line": 327,
-      "stop_column": 54,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 213,
+      "column": 16,
+      "stop_line": 213,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 327,
-      "column": 67,
-      "stop_line": 327,
-      "stop_column": 93,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 214,
+      "column": 16,
+      "stop_line": 214,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/test_deim.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `weight`",
-      "concise_description": "Object of class `Tensor` has no attribute `weight`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 328,
-      "column": 28,
-      "stop_line": 328,
-      "stop_column": 52,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 33,
+      "column": 24,
+      "stop_line": 33,
+      "stop_column": 34,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "bad-argument-type",
+      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
+      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
       "severity": "error"
     },
     {
-      "line": 328,
-      "column": 65,
-      "stop_line": 328,
-      "stop_column": 89,
-      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "line": 81,
+      "column": 20,
+      "stop_line": 81,
+      "stop_column": 57,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `bias`",
-      "concise_description": "Object of class `Tensor` has no attribute `bias`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 123,
-      "column": 12,
-      "stop_line": 123,
+      "line": 98,
+      "column": 24,
+      "stop_line": 98,
       "stop_column": 34,
-      "path": "tests/unit/backend/native/models/modules/test_conv_module.py",
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `str` has no attribute `args`",
-      "concise_description": "Object of class `str` has no attribute `args`",
+      "name": "bad-argument-type",
+      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
+      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
       "severity": "error"
     },
     {
-      "line": 129,
-      "column": 12,
-      "stop_line": 129,
+      "line": 105,
+      "column": 16,
+      "stop_line": 105,
+      "stop_column": 56,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
+      "code": -2,
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
+      "severity": "error"
+    },
+    {
+      "line": 146,
+      "column": 24,
+      "stop_line": 146,
       "stop_column": 34,
-      "path": "tests/unit/backend/native/models/modules/test_conv_module.py",
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `str` has no attribute `args`",
-      "concise_description": "Object of class `str` has no attribute `args`",
+      "name": "bad-argument-type",
+      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
+      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
       "severity": "error"
     },
     {
-      "line": 24,
-      "column": 66,
-      "stop_line": 24,
-      "stop_column": 74,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 176,
+      "column": 24,
+      "stop_line": 176,
+      "stop_column": 34,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
+      "concise_description": "Argument `str` is not assignable to parameter `model_name` with type `Literal['deimv2_l', 'deimv2_m', 'deimv2_s', 'deimv2_x']` in function `otx.backend.native.models.detection.deimv2.DEIMV2.__init__`",
       "severity": "error"
     },
     {
-      "line": 24,
-      "column": 66,
-      "stop_line": 24,
-      "stop_column": 74,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 238,
+      "column": 20,
+      "stop_line": 238,
+      "stop_column": 55,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "unsupported-operation",
+      "description": "`in` is not supported between `str` and `Module`\n  Expected `__contains__` to be a callable, got `Module | Tensor`",
+      "concise_description": "`in` is not supported between `str` and `Module`",
       "severity": "error"
     },
     {
-      "line": 24,
-      "column": 66,
-      "stop_line": 24,
-      "stop_column": 74,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 241,
+      "column": 16,
+      "stop_line": 241,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 41,
-      "column": 75,
-      "stop_line": 41,
-      "stop_column": 83,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 241,
+      "column": 38,
+      "stop_line": 241,
+      "stop_column": 48,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "bad-index",
+      "description": "Cannot index into `Tensor`\n  Argument `Literal['loss_vfl']` is not assignable to parameter `indices` with type `EllipsisType | SupportsIndex | Tensor | _NestedSequence[EllipsisType | Tensor | bool | int | slice[Any, Any, Any] | None] | bool | int | slice[Any, Any, Any] | tuple[_Index, ...] | None` in function `torch._C.TensorBase.__getitem__`",
+      "concise_description": "Cannot index into `Tensor`",
       "severity": "error"
     },
     {
-      "line": 41,
-      "column": 75,
-      "stop_line": 41,
-      "stop_column": 83,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 242,
+      "column": 16,
+      "stop_line": 242,
+      "stop_column": 50,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 41,
-      "column": 75,
-      "stop_line": 41,
-      "stop_column": 83,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 243,
+      "column": 16,
+      "stop_line": 243,
+      "stop_column": 50,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 59,
-      "column": 17,
-      "stop_line": 59,
-      "stop_column": 33,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 244,
+      "column": 16,
+      "stop_line": 244,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `(normalization: ((...) -> Module) | Module | tuple[str, Module] | None, num_features: int, postfix: int | str = '', layer_name: str | None = None, requires_grad: bool = True, eps: float = ..., **kwargs: Unknown) -> tuple[str, Module]` is not assignable to parameter `func` with type `(...) -> Module` in function `functools.partial.__new__`",
-      "concise_description": "Argument `(normalization: ((...) -> Module) | Module | tuple[str, Module] | None, num_features: int, postfix: int | str = '', layer_name: str | None = None, requires_grad: bool = True, eps: float = ..., **kwargs: Unknown) -> tuple[str, Module]` is not assignable to parameter `func` with type `(...) -> Module` in function `functools.partial.__new__`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 61,
-      "column": 9,
-      "stop_line": 61,
-      "stop_column": 17,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 245,
+      "column": 16,
+      "stop_line": 245,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 61,
-      "column": 9,
-      "stop_line": 61,
-      "stop_column": 17,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 246,
+      "column": 16,
+      "stop_line": 246,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "bad-index",
+      "description": "Cannot index into `Module`\n  Expected `__getitem__` to be a callable, got `Module | Tensor`",
+      "concise_description": "Cannot index into `Module`",
       "severity": "error"
     },
     {
-      "line": 61,
-      "column": 9,
-      "stop_line": 61,
-      "stop_column": 17,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 401,
+      "column": 20,
+      "stop_line": 401,
+      "stop_column": 30,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `list[dict[Unknown, Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 80,
-      "column": 57,
-      "stop_line": 80,
-      "stop_column": 65,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 404,
+      "column": 28,
+      "stop_line": 404,
+      "stop_column": 41,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 80,
-      "column": 57,
-      "stop_line": 80,
-      "stop_column": 65,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 405,
+      "column": 24,
+      "stop_line": 405,
+      "stop_column": 37,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 80,
-      "column": 57,
-      "stop_line": 80,
-      "stop_column": 65,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 406,
+      "column": 28,
+      "stop_line": 406,
+      "stop_column": 41,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 82,
-      "column": 9,
-      "stop_line": 82,
-      "stop_column": 17,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 409,
+      "column": 28,
+      "stop_line": 409,
+      "stop_column": 41,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 82,
-      "column": 9,
-      "stop_line": 82,
-      "stop_column": 17,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 410,
+      "column": 24,
+      "stop_line": 410,
+      "stop_column": 37,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 82,
-      "column": 9,
-      "stop_line": 82,
-      "stop_column": 17,
-      "path": "tests/unit/backend/native/models/modules/test_norm.py",
+      "line": 411,
+      "column": 16,
+      "stop_line": 411,
+      "stop_column": 29,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
-      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 24,
-      "column": 29,
-      "stop_line": 24,
-      "stop_column": 33,
-      "path": "tests/unit/backend/native/models/modules/test_padding.py",
+      "line": 414,
+      "column": 28,
+      "stop_line": 414,
+      "stop_column": 41,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `None` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown]` in function `otx.backend.native.models.modules.padding.build_padding_layer`",
-      "concise_description": "Argument `None` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown]` in function `otx.backend.native.models.modules.padding.build_padding_layer`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 73,
-      "column": 92,
-      "stop_line": 73,
-      "stop_column": 93,
-      "path": "tests/unit/backend/native/models/modules/test_transformer.py",
+      "line": 415,
+      "column": 16,
+      "stop_line": 415,
+      "stop_column": 29,
+      "path": "tests/unit/backend/native/models/detection/test_deimv2.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal[1]` is not assignable to parameter `padding` with type `str` in function `otx.backend.native.models.modules.transformer.AdaptivePadding.__init__`",
-      "concise_description": "Argument `Literal[1]` is not assignable to parameter `padding` with type `str` in function `otx.backend.native.models.modules.transformer.AdaptivePadding.__init__`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 27,
-      "column": 40,
-      "stop_line": 27,
-      "stop_column": 87,
-      "path": "tests/unit/backend/native/models/segmentation/heads/test_class_incremental.py",
+      "line": 136,
+      "column": 37,
+      "stop_line": 136,
+      "stop_column": 59,
+      "path": "tests/unit/backend/native/models/detection/test_dfine.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[0], tuple[Literal[128], Literal[128]], tuple[Literal[128], Literal[128]], ignored_labels=list[int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[0], tuple[Literal[128], Literal[128]], tuple[Literal[128], Literal[128]], ignored_labels=list[int])",
+      "name": "implicit-import",
+      "description": "Module `torchvision.tv_tensors` exists, but was not imported explicitly. You are relying on other modules to load it.",
+      "concise_description": "Module `torchvision.tv_tensors` exists, but was not imported explicitly. You are relying on other modules to load it.",
       "severity": "error"
     },
     {
-      "line": 16,
-      "column": 35,
-      "stop_line": 16,
-      "stop_column": 36,
-      "path": "tests/unit/backend/native/models/segmentation/segmentors/test_base_model.py",
+      "line": 27,
+      "column": 33,
+      "stop_line": 27,
+      "stop_column": 37,
+      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Literal[3]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
-      "concise_description": "Argument `Literal[3]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
+      "description": "Argument `None` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
+      "concise_description": "Argument `None` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
       "severity": "error"
     },
     {
-      "line": 33,
-      "column": 31,
-      "stop_line": 33,
-      "stop_column": 86,
-      "path": "tests/unit/backend/native/models/segmentation/segmentors/test_base_model.py",
+      "line": 36,
+      "column": 26,
+      "stop_line": 36,
+      "stop_column": 81,
+      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
       "code": -2,
       "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ori_shape=tuple[Literal[256], Literal[256]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ori_shape=tuple[Literal[256], Literal[256]])",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[320], Literal[320]], ori_shape=tuple[Literal[320], Literal[320]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[320], Literal[320]], ori_shape=tuple[Literal[320], Literal[320]])",
       "severity": "error"
     },
     {
-      "line": 55,
-      "column": 31,
-      "stop_line": 55,
-      "stop_column": 86,
-      "path": "tests/unit/backend/native/models/segmentation/segmentors/test_base_model.py",
+      "line": 37,
+      "column": 26,
+      "stop_line": 37,
+      "stop_column": 81,
+      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
       "code": -2,
       "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ori_shape=tuple[Literal[256], Literal[256]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ori_shape=tuple[Literal[256], Literal[256]])",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[320], Literal[320]], ori_shape=tuple[Literal[320], Literal[320]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[320], Literal[320]], ori_shape=tuple[Literal[320], Literal[320]])",
       "severity": "error"
     },
     {
-      "line": 62,
-      "column": 31,
-      "stop_line": 62,
-      "stop_column": 109,
-      "path": "tests/unit/backend/native/models/segmentation/segmentors/test_base_model.py",
+      "line": 40,
+      "column": 20,
+      "stop_line": 49,
+      "stop_column": 14,
+      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ignored_labels=list[int], ori_shape=tuple[Literal[256], Literal[256]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ignored_labels=list[int], ori_shape=tuple[Literal[256], Literal[256]])",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 37,
-      "column": 19,
-      "stop_line": 37,
-      "stop_column": 71,
-      "path": "tests/unit/backend/native/models/segmentation/test_base.py",
+      "line": 66,
+      "column": 35,
+      "stop_line": 70,
+      "stop_column": 10,
+      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "description": "Argument `(*_: Unknown) -> tuple[Unknown, Unknown, Unknown]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
+      "concise_description": "Argument `(*_: Unknown) -> tuple[Unknown, Unknown, Unknown]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
       "severity": "error"
     },
     {
-      "line": 116,
-      "column": 20,
-      "stop_line": 116,
-      "stop_column": 45,
-      "path": "tests/unit/backend/native/models/segmentation/test_base.py",
+      "line": 101,
+      "column": 42,
+      "stop_line": 101,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
+      "name": "missing-attribute",
+      "description": "Object of class `dict` has no attribute `data`",
+      "concise_description": "Object of class `dict` has no attribute `data`",
       "severity": "error"
     },
     {
-      "line": 117,
-      "column": 16,
-      "stop_line": 117,
-      "stop_column": 47,
-      "path": "tests/unit/backend/native/models/segmentation/test_base.py",
+      "line": 146,
+      "column": 9,
+      "stop_line": 146,
+      "stop_column": 29,
+      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `list` has no attribute `shape`",
-      "concise_description": "Object of class `list` has no attribute `shape`",
+      "description": "Object of class `FunctionType` has no attribute `training`",
+      "concise_description": "Object of class `FunctionType` has no attribute `training`",
       "severity": "error"
     },
     {
-      "line": 58,
+      "line": 147,
       "column": 21,
-      "stop_line": 58,
+      "stop_line": 147,
       "stop_column": 22,
-      "path": "tests/unit/backend/native/models/segmentation/test_dino_v2_seg.py",
+      "path": "tests/unit/backend/native/models/detection/test_rtdetr.py",
       "code": -2,
       "name": "bad-argument-type",
       "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
@@ -22669,11 +19165,11 @@
       "severity": "error"
     },
     {
-      "line": 66,
+      "line": 109,
       "column": 21,
-      "stop_line": 66,
+      "stop_line": 109,
       "stop_column": 22,
-      "path": "tests/unit/backend/native/models/segmentation/test_segnext.py",
+      "path": "tests/unit/backend/native/models/detection/test_rtmdet.py",
       "code": -2,
       "name": "bad-argument-type",
       "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
@@ -22681,2187 +19177,2211 @@
       "severity": "error"
     },
     {
-      "line": 54,
-      "column": 41,
-      "stop_line": 54,
-      "stop_column": 46,
-      "path": "tests/unit/backend/native/models/test_base.py",
+      "line": 55,
+      "column": 16,
+      "stop_line": 55,
+      "stop_column": 61,
+      "path": "tests/unit/backend/native/models/detection/test_ssd.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `dict[str, Tensor]` is not assignable to parameter `batch` with type `OTXSampleBatch` in function `otx.backend.native.models.base.OTXModel.training_step`",
-      "concise_description": "Argument `dict[str, Tensor]` is not assignable to parameter `batch` with type `OTXSampleBatch` in function `otx.backend.native.models.base.OTXModel.training_step`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `anchor_generator`",
+      "concise_description": "Object of class `Tensor` has no attribute `anchor_generator`",
       "severity": "error"
     },
     {
-      "line": 128,
-      "column": 35,
-      "stop_line": 128,
-      "stop_column": 110,
-      "path": "tests/unit/backend/native/models/test_base.py",
+      "line": 56,
+      "column": 16,
+      "stop_line": 56,
+      "stop_column": 61,
+      "path": "tests/unit/backend/native/models/detection/test_ssd.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `dict[str, tuple[float, float, float] | tuple[int, int]]` is not assignable to parameter `data_input_params` with type `DataInputParams | None` in function `otx.backend.native.models.classification.multiclass_models.base.OTXMulticlassClsModel.__init__`",
-      "concise_description": "Argument `dict[str, tuple[float, float, float] | tuple[int, int]]` is not assignable to parameter `data_input_params` with type `DataInputParams | None` in function `otx.backend.native.models.classification.multiclass_models.base.OTXMulticlassClsModel.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `anchor_generator`",
+      "concise_description": "Object of class `Tensor` has no attribute `anchor_generator`",
       "severity": "error"
     },
     {
-      "line": 140,
-      "column": 47,
-      "stop_line": 140,
-      "stop_column": 50,
-      "path": "tests/unit/backend/native/models/test_base.py",
+      "line": 110,
+      "column": 25,
+      "stop_line": 110,
+      "stop_column": 26,
+      "path": "tests/unit/backend/native/models/detection/test_ssd.py",
       "code": -2,
-      "name": "bad-typed-dict-key",
-      "description": "`Literal[255]` is not assignable to TypedDict key `ignore_index` with type `list[list[str]] | list[str]`",
-      "concise_description": "`Literal[255]` is not assignable to TypedDict key `ignore_index` with type `list[list[str]] | list[str]`",
+      "name": "bad-argument-type",
+      "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
+      "concise_description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
       "severity": "error"
     },
     {
-      "line": 150,
-      "column": 35,
-      "stop_line": 150,
-      "stop_column": 110,
-      "path": "tests/unit/backend/native/models/test_base.py",
+      "line": 193,
+      "column": 21,
+      "stop_line": 193,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/models/detection/test_yolox.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `dict[str, tuple[float, float, float] | tuple[int, int]]` is not assignable to parameter `data_input_params` with type `DataInputParams | None` in function `otx.backend.native.models.segmentation.base.OTXSegmentationModel.__init__`",
-      "concise_description": "Argument `dict[str, tuple[float, float, float] | tuple[int, int]]` is not assignable to parameter `data_input_params` with type `DataInputParams | None` in function `otx.backend.native.models.segmentation.base.OTXSegmentationModel.__init__`",
+      "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
+      "concise_description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
       "severity": "error"
     },
     {
-      "line": 181,
-      "column": 55,
-      "stop_line": 181,
-      "stop_column": 74,
-      "path": "tests/unit/backend/native/models/test_base.py",
+      "line": 15,
+      "column": 21,
+      "stop_line": 15,
+      "stop_column": 29,
+      "path": "tests/unit/backend/native/models/detection/utils/prior_generators/test_custom_anchor_generator.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `LabelCategories` is not assignable to parameter `dm_label_categories` with type `HierarchicalLabelCategories` in function `otx.types.label.HLabelInfo.from_dm_label_groups`",
-      "concise_description": "Argument `LabelCategories` is not assignable to parameter `dm_label_categories` with type `HierarchicalLabelCategories` in function `otx.types.label.HLabelInfo.from_dm_label_groups`",
+      "description": "Argument `tuple[Literal[16], Literal[32]]` is not assignable to parameter `strides` with type `list[int]` in function `otx.backend.native.models.detection.utils.prior_generators.anchor_generator.SSDAnchorGeneratorClustered.__init__`",
+      "concise_description": "Argument `tuple[Literal[16], Literal[32]]` is not assignable to parameter `strides` with type `list[int]` in function `otx.backend.native.models.detection.utils.prior_generators.anchor_generator.SSDAnchorGeneratorClustered.__init__`",
       "severity": "error"
     },
     {
-      "line": 186,
-      "column": 17,
-      "stop_line": 186,
-      "stop_column": 39,
-      "path": "tests/unit/backend/native/models/test_base.py",
+      "line": 20,
+      "column": 33,
+      "stop_line": 24,
+      "stop_column": 10,
+      "path": "tests/unit/backend/native/models/instance_segmentation/conftest.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `dict[str, Any]` is not assignable to parameter `label_info` with type `HLabelInfo` in function `otx.backend.native.models.classification.hlabel_models.base.OTXHlabelClsModel.__init__`",
-      "concise_description": "Argument `dict[str, Any]` is not assignable to parameter `label_info` with type `HLabelInfo` in function `otx.backend.native.models.classification.hlabel_models.base.OTXHlabelClsModel.__init__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])",
       "severity": "error"
     },
     {
-      "line": 187,
-      "column": 35,
-      "stop_line": 187,
-      "stop_column": 110,
-      "path": "tests/unit/backend/native/models/test_base.py",
+      "line": 25,
+      "column": 33,
+      "stop_line": 29,
+      "stop_column": 10,
+      "path": "tests/unit/backend/native/models/instance_segmentation/conftest.py",
+      "code": -2,
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[640], Literal[640]])",
+      "severity": "error"
+    },
+    {
+      "line": 44,
+      "column": 18,
+      "stop_line": 44,
+      "stop_column": 73,
+      "path": "tests/unit/backend/native/models/instance_segmentation/conftest.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `dict[str, tuple[float, float, float] | tuple[int, int]]` is not assignable to parameter `data_input_params` with type `DataInputParams | None` in function `otx.backend.native.models.classification.hlabel_models.base.OTXHlabelClsModel.__init__`",
-      "concise_description": "Argument `dict[str, tuple[float, float, float] | tuple[int, int]]` is not assignable to parameter `data_input_params` with type `DataInputParams | None` in function `otx.backend.native.models.classification.hlabel_models.base.OTXHlabelClsModel.__init__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
       "severity": "error"
     },
     {
-      "line": 226,
-      "column": 73,
-      "stop_line": 226,
-      "stop_column": 77,
-      "path": "tests/unit/backend/native/models/test_base.py",
+      "line": 45,
+      "column": 18,
+      "stop_line": 45,
+      "stop_column": 73,
+      "path": "tests/unit/backend/native/models/instance_segmentation/conftest.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `None` is not assignable to parameter `metric` with type `Tensor` in function `otx.backend.native.models.base.OTXModel.lr_scheduler_step`",
-      "concise_description": "Argument `None` is not assignable to parameter `metric` with type `Tensor` in function `otx.backend.native.models.base.OTXModel.lr_scheduler_step`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
       "severity": "error"
     },
     {
-      "line": 237,
-      "column": 73,
-      "stop_line": 237,
-      "stop_column": 77,
-      "path": "tests/unit/backend/native/models/test_base.py",
+      "line": 129,
+      "column": 9,
+      "stop_line": 129,
+      "stop_column": 20,
+      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `None` is not assignable to parameter `metric` with type `Tensor` in function `otx.backend.native.models.base.OTXModel.lr_scheduler_step`",
-      "concise_description": "Argument `None` is not assignable to parameter `metric` with type `Tensor` in function `otx.backend.native.models.base.OTXModel.lr_scheduler_step`",
+      "name": "unsupported-operation",
+      "description": "Cannot set item in `list[ndarray[tuple[int], dtype[Any]]]`\n  No matching overload found for function `list.__setitem__` called with arguments: (Literal[0], ndarray[tuple[int, ...], dtype[Any]])\n  Possible overloads:\n  (key: SupportsIndex, value: ndarray[tuple[int], dtype[Any]], /) -> None [closest match]\n  (key: slice[Any, Any, Any], value: Iterable[ndarray[tuple[int], dtype[Any]]], /) -> None",
+      "concise_description": "Cannot set item in `list[ndarray[tuple[int], dtype[Any]]]`",
       "severity": "error"
     },
     {
-      "line": 21,
-      "column": 42,
-      "stop_line": 21,
-      "stop_column": 46,
-      "path": "tests/unit/backend/native/models/utils/test_segmentation.py",
+      "line": 130,
+      "column": 9,
+      "stop_line": 130,
+      "stop_column": 20,
+      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `None` is not assignable to parameter `ctx` with type `(...) -> Unknown` in function `otx.backend.native.models.segmentation.modules.blocks.OnnxLpNormalization.forward`",
-      "concise_description": "Argument `None` is not assignable to parameter `ctx` with type `(...) -> Unknown` in function `otx.backend.native.models.segmentation.modules.blocks.OnnxLpNormalization.forward`",
+      "name": "unsupported-operation",
+      "description": "Cannot set item in `list[ndarray[tuple[int], dtype[Any]]]`\n  No matching overload found for function `list.__setitem__` called with arguments: (Literal[1], ndarray[tuple[int, ...], dtype[Any]])\n  Possible overloads:\n  (key: SupportsIndex, value: ndarray[tuple[int], dtype[Any]], /) -> None [closest match]\n  (key: slice[Any, Any, Any], value: Iterable[ndarray[tuple[int], dtype[Any]]], /) -> None",
+      "concise_description": "Cannot set item in `list[ndarray[tuple[int], dtype[Any]]]`",
       "severity": "error"
     },
     {
-      "line": 39,
-      "column": 47,
-      "stop_line": 39,
-      "stop_column": 64,
-      "path": "tests/unit/backend/native/models/utils/test_segmentation.py",
+      "line": 135,
+      "column": 26,
+      "stop_line": 135,
+      "stop_column": 73,
+      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `Tensor` in function `otx.backend.native.models.segmentation.modules.aggregators.IterativeAggregator.forward`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `Tensor` in function `otx.backend.native.models.segmentation.modules.aggregators.IterativeAggregator.forward`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[0], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
       "severity": "error"
     },
     {
-      "line": 42,
-      "column": 16,
-      "stop_line": 42,
-      "stop_column": 37,
-      "path": "tests/unit/backend/native/optimizers/test_callable.py",
+      "line": 136,
+      "column": 26,
+      "stop_line": 136,
+      "stop_column": 73,
+      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `lr`",
-      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `lr`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[1], img_shape=tuple[Literal[640], Literal[640]], ori_shape=tuple[Literal[640], Literal[640]])",
       "severity": "error"
     },
     {
-      "line": 43,
-      "column": 16,
-      "stop_line": 43,
-      "stop_column": 43,
-      "path": "tests/unit/backend/native/optimizers/test_callable.py",
+      "line": 138,
+      "column": 20,
+      "stop_line": 138,
+      "stop_column": 58,
+      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `momentum`",
-      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `momentum`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 44,
-      "column": 16,
-      "stop_line": 44,
-      "stop_column": 47,
-      "path": "tests/unit/backend/native/optimizers/test_callable.py",
+      "line": 140,
+      "column": 19,
+      "stop_line": 140,
+      "stop_column": 71,
+      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `weight_decay`",
-      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `weight_decay`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 58,
-      "column": 16,
-      "stop_line": 58,
-      "stop_column": 37,
-      "path": "tests/unit/backend/native/optimizers/test_callable.py",
+      "line": 143,
+      "column": 55,
+      "stop_line": 143,
+      "stop_column": 56,
+      "path": "tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `lr`",
-      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `lr`",
+      "name": "bad-argument-type",
+      "description": "Argument `tuple[Tensor, Tensor, Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.instance_segmentation.heads.rtmdet_inst_head.RTMDetInstHead.prepare_loss_inputs`",
+      "concise_description": "Argument `tuple[Tensor, Tensor, Tensor]` is not assignable to parameter `x` with type `tuple[Tensor]` in function `otx.backend.native.models.instance_segmentation.heads.rtmdet_inst_head.RTMDetInstHead.prepare_loss_inputs`",
       "severity": "error"
     },
     {
-      "line": 59,
+      "line": 27,
       "column": 16,
-      "stop_line": 59,
-      "stop_column": 43,
-      "path": "tests/unit/backend/native/optimizers/test_callable.py",
+      "stop_line": 27,
+      "stop_column": 76,
+      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `momentum`",
-      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `momentum`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 60,
-      "column": 16,
-      "stop_line": 60,
-      "stop_column": 47,
-      "path": "tests/unit/backend/native/optimizers/test_callable.py",
+      "line": 31,
+      "column": 22,
+      "stop_line": 36,
+      "stop_column": 14,
+      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `weight_decay`",
-      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `weight_decay`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[480], Literal[480]], ori_shape=tuple[Literal[480], Literal[480]], ignored_labels=list[@_])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[480], Literal[480]], ori_shape=tuple[Literal[480], Literal[480]], ignored_labels=list[@_])",
       "severity": "error"
     },
     {
-      "line": 67,
+      "line": 45,
       "column": 16,
-      "stop_line": 67,
-      "stop_column": 37,
-      "path": "tests/unit/backend/native/schedulers/test_warmup_schedulers.py",
+      "stop_line": 45,
+      "stop_column": 76,
+      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `LRScheduler` has no attribute `monitor`",
-      "concise_description": "Object of class `LRScheduler` has no attribute `monitor`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `bboxes` with type `list[BoundingBoxes] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 80,
-      "column": 16,
-      "stop_line": 80,
-      "stop_column": 37,
-      "path": "tests/unit/backend/native/schedulers/test_warmup_schedulers.py",
+      "line": 49,
+      "column": 22,
+      "stop_line": 54,
+      "stop_column": 14,
+      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `LRScheduler` has no attribute `monitor`",
-      "concise_description": "Object of class `LRScheduler` has no attribute `monitor`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[480], Literal[480]], ori_shape=tuple[Literal[480], Literal[480]], ignored_labels=list[int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[480], Literal[480]], ori_shape=tuple[Literal[480], Literal[480]], ignored_labels=list[int])",
       "severity": "error"
     },
     {
-      "line": 246,
-      "column": 18,
-      "stop_line": 246,
-      "stop_column": 27,
-      "path": "tests/unit/backend/native/test_engine.py",
+      "line": 92,
+      "column": 24,
+      "stop_line": 95,
+      "stop_column": 14,
+      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Literal['MULTI_CLASS_CLS']` is not assignable to parameter `task` with type `OTXTaskType` in function `otx.backend.native.engine.OTXEngine.from_model_name`",
-      "concise_description": "Argument `Literal['MULTI_CLASS_CLS']` is not assignable to parameter `task` with type `OTXTaskType` in function `otx.backend.native.engine.OTXEngine.from_model_name`",
+      "description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.instance_segmentation.losses.roi_loss.ROICriterion.__init__`",
+      "concise_description": "Argument `DeltaXYWHBBoxCoder` is not assignable to parameter `bbox_coder` with type `Module` in function `otx.backend.native.models.instance_segmentation.losses.roi_loss.ROICriterion.__init__`",
       "severity": "error"
     },
     {
-      "line": 258,
-      "column": 22,
-      "stop_line": 258,
-      "stop_column": 31,
-      "path": "tests/unit/backend/native/test_engine.py",
+      "line": 108,
+      "column": 13,
+      "stop_line": 108,
+      "stop_column": 56,
+      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal['MULTI_CLASS_CLS']` is not assignable to parameter `task` with type `OTXTaskType` in function `otx.backend.native.engine.OTXEngine.from_model_name`",
-      "concise_description": "Argument `Literal['MULTI_CLASS_CLS']` is not assignable to parameter `task` with type `OTXTaskType` in function `otx.backend.native.engine.OTXEngine.from_model_name`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `prepare_loss_inputs`",
+      "concise_description": "Object of class `Tensor` has no attribute `prepare_loss_inputs`",
       "severity": "error"
     },
     {
-      "line": 26,
-      "column": 23,
-      "stop_line": 26,
-      "stop_column": 89,
-      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
+      "line": 132,
+      "column": 13,
+      "stop_line": 132,
+      "stop_column": 56,
+      "path": "tests/unit/backend/native/models/instance_segmentation/test_roi_head.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `prepare_loss_inputs`",
+      "concise_description": "Object of class `Tensor` has no attribute `prepare_loss_inputs`",
       "severity": "error"
     },
     {
-      "line": 194,
-      "column": 27,
-      "stop_line": 194,
-      "stop_column": 60,
-      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
+      "line": 18,
+      "column": 33,
+      "stop_line": 18,
+      "stop_column": 53,
+      "path": "tests/unit/backend/native/models/keypoint_detection/conftest.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)",
       "severity": "error"
     },
     {
-      "line": 196,
-      "column": 59,
-      "stop_line": 196,
+      "line": 19,
+      "column": 27,
+      "stop_line": 19,
       "stop_column": 82,
-      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
+      "path": "tests/unit/backend/native/models/keypoint_detection/conftest.py",
       "code": -2,
-      "name": "not-iterable",
-      "description": "Type `None` is not iterable",
-      "concise_description": "Type `None` is not iterable",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[192], Literal[256]], ori_shape=tuple[Literal[192], Literal[256]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[192], Literal[256]], ori_shape=tuple[Literal[192], Literal[256]])",
       "severity": "error"
     },
     {
-      "line": 215,
-      "column": 27,
-      "stop_line": 215,
-      "stop_column": 60,
-      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
+      "line": 20,
+      "column": 38,
+      "stop_line": 25,
+      "stop_column": 6,
+      "path": "tests/unit/backend/native/models/keypoint_detection/conftest.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (list[list[int]], format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[192], Literal[256]], dtype=dtype)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (list[list[int]], format=Literal[BoundingBoxFormat.XYXY], canvas_size=tuple[Literal[192], Literal[256]], dtype=dtype)",
       "severity": "error"
     },
     {
-      "line": 217,
-      "column": 92,
-      "stop_line": 217,
-      "stop_column": 115,
-      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
+      "line": 26,
+      "column": 31,
+      "stop_line": 26,
+      "stop_column": 86,
+      "path": "tests/unit/backend/native/models/keypoint_detection/heads/test_rtmcc_head.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[Tensor] | None` is not assignable to parameter `iterable` with type `Iterable[Tensor]` in function `enumerate.__new__`\n  Protocol `Iterable` requires attribute `__iter__`",
-      "concise_description": "Argument `list[Tensor] | None` is not assignable to parameter `iterable` with type `Iterable[Tensor]` in function `enumerate.__new__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[192], Literal[256]], ori_shape=tuple[Literal[192], Literal[256]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[192], Literal[256]], ori_shape=tuple[Literal[192], Literal[256]])",
       "severity": "error"
     },
     {
-      "line": 236,
-      "column": 27,
-      "stop_line": 236,
-      "stop_column": 60,
-      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
+      "line": 31,
+      "column": 36,
+      "stop_line": 31,
+      "stop_column": 81,
+      "path": "tests/unit/backend/native/models/keypoint_detection/heads/test_rtmcc_head.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor)",
       "severity": "error"
     },
     {
-      "line": 244,
-      "column": 9,
-      "stop_line": 244,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
+      "line": 110,
+      "column": 20,
+      "stop_line": 110,
+      "stop_column": 48,
+      "path": "tests/unit/backend/native/models/keypoint_detection/test_base.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[Tensor]` is not assignable to parameter `saliency_map` with type `list[ndarray[Unknown, Unknown]]` in function `otx.backend.native.models.utils.xai_utils.process_saliency_maps`",
-      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `saliency_map` with type `list[ndarray[Unknown, Unknown]]` in function `otx.backend.native.models.utils.xai_utils.process_saliency_maps`",
+      "description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 254,
-      "column": 49,
-      "stop_line": 254,
-      "stop_column": 50,
-      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
+      "line": 110,
+      "column": 57,
+      "stop_line": 110,
+      "stop_column": 82,
+      "path": "tests/unit/backend/native/models/keypoint_detection/test_base.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`\n  Argument `Literal[0]` is not assignable to parameter `key` with type `str` in function `dict.__getitem__`",
-      "concise_description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 255,
-      "column": 48,
-      "stop_line": 255,
-      "stop_column": 49,
-      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
+      "line": 49,
+      "column": 13,
+      "stop_line": 49,
+      "stop_column": 24,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`\n  Argument `Literal[0]` is not assignable to parameter `key` with type `str` in function `dict.__getitem__`",
-      "concise_description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`",
+      "name": "not-callable",
+      "description": "Expected a callable, got `None`",
+      "concise_description": "Expected a callable, got `None`",
       "severity": "error"
     },
     {
-      "line": 256,
-      "column": 54,
-      "stop_line": 256,
-      "stop_column": 55,
-      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
+      "line": 50,
+      "column": 16,
+      "stop_line": 50,
+      "stop_column": 27,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-index",
-      "description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`\n  Argument `Literal[0]` is not assignable to parameter `key` with type `str` in function `dict.__getitem__`",
-      "concise_description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`",
+      "name": "not-callable",
+      "description": "Expected a callable, got `None`",
+      "concise_description": "Expected a callable, got `None`",
       "severity": "error"
     },
     {
-      "line": 32,
-      "column": 16,
-      "stop_line": 32,
-      "stop_column": 36,
-      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
+      "line": 196,
+      "column": 13,
+      "stop_line": 196,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `Tensor` is not assignable to declared return type `None`",
-      "concise_description": "Returned type `Tensor` is not assignable to declared return type `None`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `assert_not_called`",
+      "concise_description": "Object of class `Tensor` has no attribute `assert_not_called`",
       "severity": "error"
     },
     {
-      "line": 37,
+      "line": 213,
       "column": 9,
-      "stop_line": 37,
-      "stop_column": 28,
-      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
+      "stop_line": 213,
+      "stop_column": 33,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "code": -2,
+      "name": "missing-attribute",
+      "description": "Object of class `NoneType` has no attribute `is_init`",
+      "concise_description": "Object of class `NoneType` has no attribute `is_init`",
+      "severity": "error"
+    },
+    {
+      "line": 214,
+      "column": 29,
+      "stop_line": 214,
+      "stop_column": 33,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `(_: Unknown) -> None` is not assignable to parameter `head_forward_fn` with type `(FeatureMapType) -> Tensor` in function `otx.backend.native.tools.explain.explain_algo.ReciproCAM.__init__`",
-      "concise_description": "Argument `(_: Unknown) -> None` is not assignable to parameter `head_forward_fn` with type `(FeatureMapType) -> Tensor` in function `otx.backend.native.tools.explain.explain_algo.ReciproCAM.__init__`",
+      "description": "Argument `Literal[True]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
+      "concise_description": "Argument `Literal[True]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
       "severity": "error"
     },
     {
-      "line": 52,
-      "column": 16,
-      "stop_line": 52,
-      "stop_column": 37,
-      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
+      "line": 216,
+      "column": 9,
+      "stop_line": 216,
+      "stop_column": 38,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `Tensor` is not assignable to declared return type `None`",
-      "concise_description": "Returned type `Tensor` is not assignable to declared return type `None`",
+      "name": "missing-attribute",
+      "description": "Object of class `NoneType` has no attribute `init_weights`",
+      "concise_description": "Object of class `NoneType` has no attribute `init_weights`",
       "severity": "error"
     },
     {
-      "line": 56,
+      "line": 217,
       "column": 9,
-      "stop_line": 56,
-      "stop_column": 28,
-      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
+      "stop_line": 217,
+      "stop_column": 38,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `(_: Unknown) -> None` is not assignable to parameter `head_forward_fn` with type `(FeatureMapType) -> Tensor` in function `otx.backend.native.tools.explain.explain_algo.ViTReciproCAM.__init__`",
-      "concise_description": "Argument `(_: Unknown) -> None` is not assignable to parameter `head_forward_fn` with type `(FeatureMapType) -> Tensor` in function `otx.backend.native.tools.explain.explain_algo.ViTReciproCAM.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `NoneType` has no attribute `init_weights`",
+      "concise_description": "Object of class `NoneType` has no attribute `init_weights`",
       "severity": "error"
     },
     {
-      "line": 100,
-      "column": 29,
-      "stop_line": 100,
-      "stop_column": 80,
-      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
+      "line": 218,
+      "column": 9,
+      "stop_line": 218,
+      "stop_column": 38,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[10], Literal[10]], ori_shape=tuple[Literal[10], Literal[10]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[10], Literal[10]], ori_shape=tuple[Literal[10], Literal[10]])",
+      "name": "missing-attribute",
+      "description": "Object of class `NoneType` has no attribute `init_weights`",
+      "concise_description": "Object of class `NoneType` has no attribute `init_weights`",
       "severity": "error"
     },
     {
-      "line": 103,
-      "column": 39,
-      "stop_line": 107,
-      "stop_column": 14,
-      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
+      "line": 219,
+      "column": 9,
+      "stop_line": 219,
+      "stop_column": 38,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xywh'], canvas_size=tuple[Literal[10], Literal[10]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xywh'], canvas_size=tuple[Literal[10], Literal[10]])",
+      "name": "missing-attribute",
+      "description": "Object of class `NoneType` has no attribute `init_weights`",
+      "concise_description": "Object of class `NoneType` has no attribute `init_weights`",
       "severity": "error"
     },
     {
-      "line": 23,
-      "column": 31,
-      "stop_line": 23,
-      "stop_column": 35,
-      "path": "tests/unit/backend/native/utils/test_api.py",
+      "line": 220,
+      "column": 9,
+      "stop_line": 220,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `str` is not assignable to parameter `task` with type `OTXTaskType | None` in function `otx.backend.native.cli.utils.list_models`",
-      "concise_description": "Argument `str` is not assignable to parameter `task` with type `OTXTaskType | None` in function `otx.backend.native.cli.utils.list_models`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `assert_not_called`",
+      "concise_description": "Object of class `Tensor` has no attribute `assert_not_called`",
       "severity": "error"
     },
     {
-      "line": 74,
-      "column": 16,
-      "stop_line": 74,
-      "stop_column": 23,
-      "path": "tests/unit/backend/native/utils/test_instantiators.py",
+      "line": 231,
+      "column": 28,
+      "stop_line": 231,
+      "stop_column": 54,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[partial[Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `list[partial[Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 75,
-      "column": 12,
-      "stop_line": 75,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/utils/test_instantiators.py",
+      "line": 231,
+      "column": 67,
+      "stop_line": 231,
+      "stop_column": 93,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 76,
-      "column": 12,
-      "stop_line": 76,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/utils/test_instantiators.py",
+      "line": 232,
+      "column": 28,
+      "stop_line": 232,
+      "stop_column": 52,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 77,
-      "column": 12,
-      "stop_line": 77,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/utils/test_instantiators.py",
+      "line": 232,
+      "column": 65,
+      "stop_line": 232,
+      "stop_column": 89,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 78,
-      "column": 12,
-      "stop_line": 78,
-      "stop_column": 22,
-      "path": "tests/unit/backend/native/utils/test_instantiators.py",
+      "line": 233,
+      "column": 28,
+      "stop_line": 233,
+      "stop_column": 54,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "unsupported-operation",
-      "description": "`None` is not subscriptable",
-      "concise_description": "`None` is not subscriptable",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 96,
-      "column": 12,
-      "stop_line": 96,
-      "stop_column": 31,
-      "path": "tests/unit/backend/native/utils/test_instantiators.py",
+      "line": 233,
+      "column": 67,
+      "stop_line": 233,
+      "stop_column": 93,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `Sampler` has no attribute `num_samples`",
-      "concise_description": "Object of class `Sampler` has no attribute `num_samples`",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 97,
-      "column": 12,
-      "stop_line": 97,
-      "stop_column": 31,
-      "path": "tests/unit/backend/native/utils/test_instantiators.py",
+      "line": 234,
+      "column": 28,
+      "stop_line": 234,
+      "stop_column": 52,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `Sampler` has no attribute `replacement`",
-      "concise_description": "Object of class `Sampler` has no attribute `replacement`",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 19,
-      "column": 29,
-      "stop_line": 19,
-      "stop_column": 57,
-      "path": "tests/unit/backend/native/utils/test_mask_utils.py",
+      "line": 234,
+      "column": 65,
+      "stop_line": 234,
+      "stop_column": 89,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C._VariableFunctions.randint` called with arguments: (low=Literal[0], high=Literal[2], size=tuple[Tensor, Tensor])\n  Possible overloads:\n  (low: int, high: int, size: _size, *, generator: Generator | None = None, dtype: dtype | None = None, device: device | int | str | None = None, requires_grad: bool = False, pin_memory: bool = False) -> Tensor [closest match]\n  (high: int, size: _size, *, generator: Generator | None = None, dtype: dtype | None = None, device: device | int | str | None = None, requires_grad: bool = False, pin_memory: bool = False) -> Tensor\n  (high: SymInt | int, size: Sequence[SymInt | int], *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (high: SymInt | int, size: Sequence[SymInt | int], *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (low: SymInt | int, high: SymInt | int, size: Sequence[SymInt | int], *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (low: SymInt | int, high: SymInt | int, size: Sequence[SymInt | int], *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor",
-      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.randint` called with arguments: (low=Literal[0], high=Literal[2], size=tuple[Tensor, Tensor])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 21,
-      "column": 43,
-      "stop_line": 21,
-      "stop_column": 74,
-      "path": "tests/unit/backend/native/utils/test_mask_utils.py",
+      "line": 245,
+      "column": 28,
+      "stop_line": 245,
+      "stop_column": 54,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `pycocotools.mask.frPyObjects` called with arguments: (dict[Unknown, Unknown], *Unknown)\n  Possible overloads:\n  (pyobj: list[list[int]] | list[_EncodedRLE] | ndarray[tuple[int, ...], dtype[unsignedinteger[_32Bit]]], h: int, w: int) -> list[_EncodedRLE] [closest match]\n  (pyobj: list[int] | _EncodedRLE, h: int, w: int) -> _EncodedRLE",
-      "concise_description": "No matching overload found for function `pycocotools.mask.frPyObjects` called with arguments: (dict[Unknown, Unknown], *Unknown)",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 17,
-      "column": 24,
-      "stop_line": 17,
-      "stop_column": 50,
-      "path": "tests/unit/backend/openvino/conftest.py",
+      "line": 245,
+      "column": 67,
+      "stop_line": 245,
+      "stop_column": 93,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `openvino._ov_api.Model.__init__` called with arguments: (Parameter, list[Parameter])\n  Possible overloads:\n  (other: Model) -> None\n  (results: Sequence[Result], sinks: Sequence[Node], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Result], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Node], parameters: Sequence[Parameter], name: str = '') -> None\n  (result: Node, parameters: Sequence[Parameter], name: str = '') -> None [closest match]\n  (results: Sequence[Output], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Output], sinks: Sequence[Node], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Output], sinks: Sequence[Output], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Output], sinks: Sequence[Output], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None\n  (results: Sequence[Result], sinks: Sequence[Output], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Result], sinks: Sequence[Output], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None\n  (results: Sequence[Result], sinks: Sequence[Node], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None\n  (results: Sequence[Output], sinks: Sequence[Node], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None\n  (results: Sequence[Result], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None\n  (results: Sequence[Output], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None",
-      "concise_description": "No matching overload found for function `openvino._ov_api.Model.__init__` called with arguments: (Parameter, list[Parameter])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 119,
-      "column": 22,
-      "stop_line": 119,
-      "stop_column": 26,
-      "path": "tests/unit/cli/utils/test_jsonargparse.py",
+      "line": 246,
+      "column": 28,
+      "stop_line": 246,
+      "stop_column": 52,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `None` is not assignable to parameter `self` with type `ActionConfigFile` in function `otx.cli.utils.jsonargparse.apply_config`",
-      "concise_description": "Argument `None` is not assignable to parameter `self` with type `ActionConfigFile` in function `otx.cli.utils.jsonargparse.apply_config`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 380,
-      "column": 32,
-      "stop_line": 380,
-      "stop_column": 41,
-      "path": "tests/unit/cli/utils/test_jsonargparse.py",
+      "line": 246,
+      "column": 65,
+      "stop_line": 246,
+      "stop_column": 89,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `type[test_get_short_docstring.Component]` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
-      "concise_description": "Argument `type[test_get_short_docstring.Component]` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 381,
-      "column": 32,
-      "stop_line": 381,
-      "stop_column": 48,
-      "path": "tests/unit/cli/utils/test_jsonargparse.py",
+      "line": 247,
+      "column": 28,
+      "stop_line": 247,
+      "stop_column": 54,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `(self: test_get_short_docstring.Component) -> None` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
-      "concise_description": "Argument `(self: test_get_short_docstring.Component) -> None` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 382,
-      "column": 32,
-      "stop_line": 382,
-      "stop_column": 48,
-      "path": "tests/unit/cli/utils/test_jsonargparse.py",
+      "line": 247,
+      "column": 67,
+      "stop_line": 247,
+      "stop_column": 93,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `type[test_get_short_docstring.WithoutDocstring]` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
-      "concise_description": "Argument `type[test_get_short_docstring.WithoutDocstring]` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 48,
-      "column": 5,
-      "stop_line": 48,
-      "stop_column": 15,
-      "path": "tests/unit/data/conftest.py",
+      "line": 248,
+      "column": 28,
+      "stop_line": 248,
+      "stop_column": 52,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `ImageFromBytes` has no attribute `path`",
-      "concise_description": "Object of class `ImageFromBytes` has no attribute `path`",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 57,
-      "column": 17,
-      "stop_line": 57,
-      "stop_column": 60,
-      "path": "tests/unit/data/conftest.py",
+      "line": 248,
+      "column": 65,
+      "stop_line": 248,
+      "stop_column": 89,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "missing-argument",
-      "description": "Missing argument `_image` in function `datumaro.components.annotation.Mask.__init__`",
-      "concise_description": "Missing argument `_image` in function `datumaro.components.annotation.Mask.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 57,
-      "column": 27,
-      "stop_line": 57,
-      "stop_column": 32,
-      "path": "tests/unit/data/conftest.py",
+      "line": 260,
+      "column": 13,
+      "stop_line": 260,
+      "stop_column": 51,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `image` in function `datumaro.components.annotation.Mask.__init__`",
-      "concise_description": "Unexpected keyword argument `image` in function `datumaro.components.annotation.Mask.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 72,
-      "column": 5,
-      "stop_line": 72,
-      "stop_column": 15,
-      "path": "tests/unit/data/conftest.py",
+      "line": 261,
+      "column": 24,
+      "stop_line": 261,
+      "stop_column": 62,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `ImageFromBytes` has no attribute `path`",
-      "concise_description": "Object of class `ImageFromBytes` has no attribute `path`",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 93,
-      "column": 5,
-      "stop_line": 93,
-      "stop_column": 15,
-      "path": "tests/unit/data/conftest.py",
+      "line": 264,
+      "column": 13,
+      "stop_line": 264,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `ImageFromBytes` has no attribute `path`",
-      "concise_description": "Object of class `ImageFromBytes` has no attribute `path`",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 116,
-      "column": 5,
-      "stop_line": 116,
-      "stop_column": 15,
-      "path": "tests/unit/data/conftest.py",
+      "line": 265,
+      "column": 24,
+      "stop_line": 265,
+      "stop_column": 60,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `ImageFromBytes` has no attribute `path`",
-      "concise_description": "Object of class `ImageFromBytes` has no attribute `path`",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 123,
-      "column": 17,
-      "stop_line": 123,
-      "stop_column": 60,
-      "path": "tests/unit/data/conftest.py",
+      "line": 268,
+      "column": 13,
+      "stop_line": 268,
+      "stop_column": 51,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "missing-argument",
-      "description": "Missing argument `_image` in function `datumaro.components.annotation.Mask.__init__`",
-      "concise_description": "Missing argument `_image` in function `datumaro.components.annotation.Mask.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 123,
-      "column": 27,
-      "stop_line": 123,
-      "stop_column": 32,
-      "path": "tests/unit/data/conftest.py",
+      "line": 269,
+      "column": 24,
+      "stop_line": 269,
+      "stop_column": 62,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `image` in function `datumaro.components.annotation.Mask.__init__`",
-      "concise_description": "Unexpected keyword argument `image` in function `datumaro.components.annotation.Mask.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 226,
-      "column": 20,
-      "stop_line": 226,
-      "stop_column": 104,
-      "path": "tests/unit/data/conftest.py",
+      "line": 272,
+      "column": 13,
+      "stop_line": 272,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
-      "concise_description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 247,
-      "column": 12,
-      "stop_line": 294,
-      "stop_column": 26,
-      "path": "tests/unit/data/conftest.py",
+      "line": 273,
+      "column": 24,
+      "stop_line": 273,
+      "stop_column": 60,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `DatasetSubset` is not assignable to declared return type `Dataset`",
-      "concise_description": "Returned type `DatasetSubset` is not assignable to declared return type `Dataset`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 250,
-      "column": 20,
-      "stop_line": 250,
-      "stop_column": 21,
-      "path": "tests/unit/data/conftest.py",
+      "line": 286,
+      "column": 13,
+      "stop_line": 286,
+      "stop_column": 51,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal[0]` is not assignable to parameter `id` with type `str` in function `datumaro.components.dataset_base.DatasetItem.__init__`",
-      "concise_description": "Argument `Literal[0]` is not assignable to parameter `id` with type `str` in function `datumaro.components.dataset_base.DatasetItem.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 262,
-      "column": 20,
-      "stop_line": 262,
-      "stop_column": 21,
-      "path": "tests/unit/data/conftest.py",
+      "line": 287,
+      "column": 24,
+      "stop_line": 287,
+      "stop_column": 62,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal[1]` is not assignable to parameter `id` with type `str` in function `datumaro.components.dataset_base.DatasetItem.__init__`",
-      "concise_description": "Argument `Literal[1]` is not assignable to parameter `id` with type `str` in function `datumaro.components.dataset_base.DatasetItem.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 188,
-      "column": 30,
-      "stop_line": 188,
-      "stop_column": 42,
-      "path": "tests/unit/data/dataset/test_base.py",
+      "line": 290,
+      "column": 13,
+      "stop_line": 290,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`Literal['not_a_list']` is not assignable to attribute `transforms` with type `((...) -> Unknown) | Compose | dict[str, ((...) -> Unknown) | Compose | list[(...) -> Unknown]] | list[(...) -> Unknown] | None`",
-      "concise_description": "`Literal['not_a_list']` is not assignable to attribute `transforms` with type `((...) -> Unknown) | Compose | dict[str, ((...) -> Unknown) | Compose | list[(...) -> Unknown]] | list[(...) -> Unknown] | None`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 231,
-      "column": 20,
-      "stop_line": 231,
-      "stop_column": 56,
-      "path": "tests/unit/data/dataset/test_base.py",
+      "line": 291,
+      "column": 24,
+      "stop_line": 291,
+      "stop_column": 60,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `FunctionType` has no attribute `call_count`",
-      "concise_description": "Object of class `FunctionType` has no attribute `call_count`",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 126,
-      "column": 9,
-      "stop_line": 126,
-      "stop_column": 52,
-      "path": "tests/unit/data/dataset/test_mixins.py",
+      "line": 294,
+      "column": 13,
+      "stop_line": 294,
+      "stop_column": 51,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `DatasetWithoutSwitch` has no attribute `_ensure_data_aug_switch_initialized`",
-      "concise_description": "Object of class `DatasetWithoutSwitch` has no attribute `_ensure_data_aug_switch_initialized`",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 129,
-      "column": 9,
-      "stop_line": 129,
-      "stop_column": 41,
-      "path": "tests/unit/data/dataset/test_mixins.py",
+      "line": 295,
+      "column": 24,
+      "stop_line": 295,
+      "stop_column": 62,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `DatasetWithoutSwitch` has no attribute `has_dynamic_augmentation`",
-      "concise_description": "Object of class `DatasetWithoutSwitch` has no attribute `has_dynamic_augmentation`",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 131,
-      "column": 20,
-      "stop_line": 131,
-      "stop_column": 52,
-      "path": "tests/unit/data/dataset/test_mixins.py",
+      "line": 298,
+      "column": 13,
+      "stop_line": 298,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `DatasetWithoutSwitch` has no attribute `has_dynamic_augmentation`",
-      "concise_description": "Object of class `DatasetWithoutSwitch` has no attribute `has_dynamic_augmentation`",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 142,
-      "column": 9,
-      "stop_line": 142,
-      "stop_column": 53,
-      "path": "tests/unit/data/dataset/test_mixins.py",
+      "line": 299,
+      "column": 24,
+      "stop_line": 299,
+      "stop_column": 60,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `DatasetWithNoneSwitch` has no attribute `_ensure_data_aug_switch_initialized`",
-      "concise_description": "Object of class `DatasetWithNoneSwitch` has no attribute `_ensure_data_aug_switch_initialized`",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 145,
-      "column": 9,
-      "stop_line": 145,
-      "stop_column": 42,
-      "path": "tests/unit/data/dataset/test_mixins.py",
+      "line": 311,
+      "column": 28,
+      "stop_line": 311,
+      "stop_column": 54,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `DatasetWithNoneSwitch` has no attribute `has_dynamic_augmentation`",
-      "concise_description": "Object of class `DatasetWithNoneSwitch` has no attribute `has_dynamic_augmentation`",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 147,
-      "column": 20,
-      "stop_line": 147,
-      "stop_column": 53,
-      "path": "tests/unit/data/dataset/test_mixins.py",
+      "line": 311,
+      "column": 67,
+      "stop_line": 311,
+      "stop_column": 93,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `DatasetWithNoneSwitch` has no attribute `has_dynamic_augmentation`",
-      "concise_description": "Object of class `DatasetWithNoneSwitch` has no attribute `has_dynamic_augmentation`",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 53,
-      "column": 53,
-      "stop_line": 53,
-      "stop_column": 56,
-      "path": "tests/unit/data/dataset/test_tile.py",
+      "line": 312,
+      "column": 28,
+      "stop_line": 312,
+      "stop_column": 52,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
-      "concise_description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 64,
-      "column": 53,
-      "stop_line": 64,
-      "stop_column": 56,
-      "path": "tests/unit/data/dataset/test_tile.py",
+      "line": 312,
+      "column": 65,
+      "stop_line": 312,
+      "stop_column": 89,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
-      "concise_description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 71,
-      "column": 53,
-      "stop_line": 71,
-      "stop_column": 56,
-      "path": "tests/unit/data/dataset/test_tile.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
-      "concise_description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
+      "line": 313,
+      "column": 28,
+      "stop_line": 313,
+      "stop_column": 54,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
+      "code": -2,
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 78,
-      "column": 53,
-      "stop_line": 78,
-      "stop_column": 56,
-      "path": "tests/unit/data/dataset/test_tile.py",
+      "line": 313,
+      "column": 67,
+      "stop_line": 313,
+      "stop_column": 93,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
-      "concise_description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 19,
-      "column": 31,
-      "stop_line": 19,
-      "stop_column": 76,
-      "path": "tests/unit/data/entity/conftest.py",
+      "line": 314,
+      "column": 28,
+      "stop_line": 314,
+      "stop_column": 52,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, dtype=dtype)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, dtype=dtype)",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 91,
-      "column": 66,
-      "stop_line": 91,
+      "line": 314,
+      "column": 65,
+      "stop_line": 314,
       "stop_column": 89,
-      "path": "tests/unit/data/entity/test_base.py",
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `Tensor` has no attribute `img_shape`",
-      "concise_description": "Object of class `Tensor` has no attribute `img_shape`",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 23,
-      "column": 40,
-      "stop_line": 23,
-      "stop_column": 95,
-      "path": "tests/unit/data/entity/test_torch.py",
+      "line": 325,
+      "column": 28,
+      "stop_line": 325,
+      "stop_column": 54,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 24,
-      "column": 53,
-      "stop_line": 28,
-      "stop_column": 14,
-      "path": "tests/unit/data/entity/test_torch.py",
+      "line": 325,
+      "column": 67,
+      "stop_line": 325,
+      "stop_column": 93,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xywh'], canvas_size=tuple[Literal[224], Literal[224]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xywh'], canvas_size=tuple[Literal[224], Literal[224]])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 35,
-      "column": 20,
-      "stop_line": 35,
-      "stop_column": 40,
-      "path": "tests/unit/data/entity/test_torch.py",
+      "line": 326,
+      "column": 28,
+      "stop_line": 326,
+      "stop_column": 52,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Sequence[ImageInfo | None] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `Sequence[ImageInfo | None] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 21,
-      "column": 22,
-      "stop_line": 21,
-      "stop_column": 69,
-      "path": "tests/unit/data/entity/test_utils.py",
+      "line": 326,
+      "column": 65,
+      "stop_line": 326,
+      "stop_column": 89,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[3], Literal[2]], img_idx=Literal[0], ori_shape=tuple[Literal[2], Literal[2]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[3], Literal[2]], img_idx=Literal[0], ori_shape=tuple[Literal[2], Literal[2]])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 22,
-      "column": 22,
-      "stop_line": 22,
-      "stop_column": 69,
-      "path": "tests/unit/data/entity/test_utils.py",
+      "line": 327,
+      "column": 28,
+      "stop_line": 327,
+      "stop_column": 54,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[3]], img_idx=Literal[1], ori_shape=tuple[Literal[2], Literal[3]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[3]], img_idx=Literal[1], ori_shape=tuple[Literal[2], Literal[3]])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 23,
-      "column": 22,
-      "stop_line": 23,
-      "stop_column": 69,
-      "path": "tests/unit/data/entity/test_utils.py",
+      "line": 327,
+      "column": 67,
+      "stop_line": 327,
+      "stop_column": 93,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[4]], img_idx=Literal[2], ori_shape=tuple[Literal[2], Literal[4]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[4]], img_idx=Literal[2], ori_shape=tuple[Literal[2], Literal[4]])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `weight`",
+      "concise_description": "Object of class `Tensor` has no attribute `weight`",
       "severity": "error"
     },
     {
-      "line": 24,
-      "column": 22,
-      "stop_line": 24,
-      "stop_column": 91,
-      "path": "tests/unit/data/entity/test_utils.py",
+      "line": 328,
+      "column": 28,
+      "stop_line": 328,
+      "stop_column": 52,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[3]], img_idx=Literal[3], ori_shape=tuple[Literal[1], Literal[2]], padding=tuple[Literal[0], Literal[0], Literal[1], Literal[1]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[3]], img_idx=Literal[3], ori_shape=tuple[Literal[1], Literal[2]], padding=tuple[Literal[0], Literal[0], Literal[1], Literal[1]])",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 29,
-      "column": 46,
-      "stop_line": 29,
-      "stop_column": 59,
-      "path": "tests/unit/data/entity/test_utils.py",
+      "line": 328,
+      "column": 65,
+      "stop_line": 328,
+      "stop_column": 89,
+      "path": "tests/unit/backend/native/models/modules/test_base_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `tensor_list` with type `list[Tensor]` in function `otx.data.entity.utils.stack_batch`",
-      "concise_description": "Argument `Tensor | list[Tensor]` is not assignable to parameter `tensor_list` with type `list[Tensor]` in function `otx.data.entity.utils.stack_batch`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `bias`",
+      "concise_description": "Object of class `Tensor` has no attribute `bias`",
       "severity": "error"
     },
     {
-      "line": 29,
-      "column": 61,
-      "stop_line": 29,
-      "stop_column": 77,
-      "path": "tests/unit/data/entity/test_utils.py",
+      "line": 123,
+      "column": 12,
+      "stop_line": 123,
+      "stop_column": 34,
+      "path": "tests/unit/backend/native/models/modules/test_conv_module.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Sequence[ImageInfo | None] | None` is not assignable to parameter `img_info_list` with type `list[ImageInfo]` in function `otx.data.entity.utils.stack_batch`",
-      "concise_description": "Argument `Sequence[ImageInfo | None] | None` is not assignable to parameter `img_info_list` with type `list[ImageInfo]` in function `otx.data.entity.utils.stack_batch`",
+      "name": "missing-attribute",
+      "description": "Object of class `str` has no attribute `args`",
+      "concise_description": "Object of class `str` has no attribute `args`",
       "severity": "error"
     },
     {
-      "line": 46,
+      "line": 129,
+      "column": 12,
+      "stop_line": 129,
+      "stop_column": 34,
+      "path": "tests/unit/backend/native/models/modules/test_conv_module.py",
+      "code": -2,
+      "name": "missing-attribute",
+      "description": "Object of class `str` has no attribute `args`",
+      "concise_description": "Object of class `str` has no attribute `args`",
+      "severity": "error"
+    },
+    {
+      "line": 24,
       "column": 66,
-      "stop_line": 46,
-      "stop_column": 96,
-      "path": "tests/unit/data/samplers/test_balanced_sampler.py",
+      "stop_line": 24,
+      "stop_column": 74,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `DatasetSubset` is not assignable to parameter `legacy_dataset` with type `Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
-      "concise_description": "Argument `DatasetSubset` is not assignable to parameter `legacy_dataset` with type `Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 59,
-      "column": 39,
-      "stop_line": 59,
-      "stop_column": 69,
-      "path": "tests/unit/data/samplers/test_class_incremental_sampler.py",
+      "line": 24,
+      "column": 66,
+      "stop_line": 24,
+      "stop_column": 74,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `DatasetSubset` is not assignable to parameter `legacy_dataset` with type `Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
-      "concise_description": "Argument `DatasetSubset` is not assignable to parameter `legacy_dataset` with type `Dataset` in function `datumaro.experimental.legacy.dataset_converters.convert_from_legacy`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 190,
-      "column": 16,
-      "stop_line": 190,
-      "stop_column": 19,
-      "path": "tests/unit/data/test_module.py",
+      "line": 24,
+      "column": 66,
+      "stop_line": 24,
+      "stop_column": 74,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `DictConfig`",
-      "concise_description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `DictConfig`",
+      "name": "bad-argument-type",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 228,
-      "column": 35,
-      "stop_line": 228,
-      "stop_column": 43,
-      "path": "tests/unit/data/test_module.py",
+      "line": 41,
+      "column": 75,
+      "stop_line": 41,
+      "stop_column": 83,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
-      "name": "not-a-type",
-      "description": "Expected a type form, got instance of `(obj: object, /) -> TypeIs[(...) -> object]`",
-      "concise_description": "Expected a type form, got instance of `(obj: object, /) -> TypeIs[(...) -> object]`",
+      "name": "bad-argument-type",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 88,
-      "column": 16,
-      "stop_line": 88,
-      "stop_column": 67,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 41,
+      "column": 75,
+      "stop_line": 41,
+      "stop_column": 83,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Categories` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `Categories` is not assignable to parameter `obj` with type `Sized` in function `len`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 183,
-      "column": 77,
-      "stop_line": 183,
+      "line": 41,
+      "column": 75,
+      "stop_line": 41,
       "stop_column": 83,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int] | list[str]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int] | list[str]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 189,
-      "column": 88,
-      "stop_line": 189,
-      "stop_column": 91,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 59,
+      "column": 17,
+      "stop_line": 59,
+      "stop_column": 33,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `(normalization: ((...) -> Module) | Module | tuple[str, Module] | None, num_features: int, postfix: int | str = '', layer_name: str | None = None, requires_grad: bool = True, eps: float = ..., **kwargs: Unknown) -> tuple[str, Module]` is not assignable to parameter `func` with type `(...) -> Module` in function `functools.partial.__new__`",
+      "concise_description": "Argument `(normalization: ((...) -> Module) | Module | tuple[str, Module] | None, num_features: int, postfix: int | str = '', layer_name: str | None = None, requires_grad: bool = True, eps: float = ..., **kwargs: Unknown) -> tuple[str, Module]` is not assignable to parameter `func` with type `(...) -> Module` in function `functools.partial.__new__`",
       "severity": "error"
     },
     {
-      "line": 195,
-      "column": 88,
-      "stop_line": 195,
-      "stop_column": 91,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 61,
+      "column": 9,
+      "stop_line": 61,
+      "stop_column": 17,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 201,
-      "column": 88,
-      "stop_line": 201,
-      "stop_column": 91,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 61,
+      "column": 9,
+      "stop_line": 61,
+      "stop_column": 17,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 210,
-      "column": 89,
-      "stop_line": 210,
-      "stop_column": 92,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 61,
+      "column": 9,
+      "stop_line": 61,
+      "stop_column": 17,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 211,
-      "column": 89,
-      "stop_line": 211,
-      "stop_column": 92,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 80,
+      "column": 57,
+      "stop_line": 80,
+      "stop_column": 65,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 212,
-      "column": 87,
-      "stop_line": 212,
-      "stop_column": 90,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 80,
+      "column": 57,
+      "stop_line": 80,
+      "stop_column": 65,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 220,
-      "column": 98,
-      "stop_line": 220,
-      "stop_column": 101,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 80,
+      "column": 57,
+      "stop_line": 80,
+      "stop_column": 65,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 221,
-      "column": 101,
-      "stop_line": 221,
-      "stop_column": 104,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 82,
+      "column": 9,
+      "stop_line": 82,
+      "stop_column": 17,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `layer_name` with type `str | None` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 222,
-      "column": 101,
-      "stop_line": 222,
-      "stop_column": 104,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 82,
+      "column": 9,
+      "stop_line": 82,
+      "stop_column": 17,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `requires_grad` with type `bool` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 230,
-      "column": 106,
-      "stop_line": 230,
-      "stop_column": 109,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 82,
+      "column": 9,
+      "stop_line": 82,
+      "stop_column": 17,
+      "path": "tests/unit/backend/native/models/modules/test_norm.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
+      "concise_description": "Unpacked keyword argument `int | str` is not assignable to parameter `eps` with type `float` in function `otx.backend.native.models.modules.norm.build_norm_layer`",
       "severity": "error"
     },
     {
-      "line": 232,
-      "column": 106,
-      "stop_line": 232,
-      "stop_column": 109,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 24,
+      "column": 29,
+      "stop_line": 24,
+      "stop_column": 33,
+      "path": "tests/unit/backend/native/models/modules/test_padding.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `None` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown]` in function `otx.backend.native.models.modules.padding.build_padding_layer`",
+      "concise_description": "Argument `None` is not assignable to parameter `cfg` with type `dict[Unknown, Unknown]` in function `otx.backend.native.models.modules.padding.build_padding_layer`",
       "severity": "error"
     },
     {
-      "line": 234,
-      "column": 99,
-      "stop_line": 234,
-      "stop_column": 102,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 73,
+      "column": 92,
+      "stop_line": 73,
+      "stop_column": 93,
+      "path": "tests/unit/backend/native/models/modules/test_transformer.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `Literal[1]` is not assignable to parameter `padding` with type `str` in function `otx.backend.native.models.modules.transformer.AdaptivePadding.__init__`",
+      "concise_description": "Argument `Literal[1]` is not assignable to parameter `padding` with type `str` in function `otx.backend.native.models.modules.transformer.AdaptivePadding.__init__`",
       "severity": "error"
     },
     {
-      "line": 245,
-      "column": 98,
-      "stop_line": 245,
-      "stop_column": 101,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 27,
+      "column": 40,
+      "stop_line": 27,
+      "stop_column": 87,
+      "path": "tests/unit/backend/native/models/segmentation/heads/test_class_incremental.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[0], tuple[Literal[128], Literal[128]], tuple[Literal[128], Literal[128]], ignored_labels=list[int])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Literal[0], tuple[Literal[128], Literal[128]], tuple[Literal[128], Literal[128]], ignored_labels=list[int])",
       "severity": "error"
     },
     {
-      "line": 246,
-      "column": 100,
-      "stop_line": 246,
-      "stop_column": 103,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 16,
+      "column": 35,
+      "stop_line": 16,
+      "stop_column": 36,
+      "path": "tests/unit/backend/native/models/segmentation/segmentors/test_base_model.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `Literal[3]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
+      "concise_description": "Argument `Literal[3]` is not assignable to parameter `value` with type `Module | Tensor` in function `torch.nn.modules.module.Module.__setattr__`",
       "severity": "error"
     },
     {
-      "line": 247,
-      "column": 101,
-      "stop_line": 247,
-      "stop_column": 104,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 33,
+      "column": 31,
+      "stop_line": 33,
+      "stop_column": 86,
+      "path": "tests/unit/backend/native/models/segmentation/segmentors/test_base_model.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ori_shape=tuple[Literal[256], Literal[256]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ori_shape=tuple[Literal[256], Literal[256]])",
       "severity": "error"
     },
     {
-      "line": 248,
-      "column": 103,
-      "stop_line": 248,
-      "stop_column": 106,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 55,
+      "column": 31,
+      "stop_line": 55,
+      "stop_column": 86,
+      "path": "tests/unit/backend/native/models/segmentation/segmentors/test_base_model.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ori_shape=tuple[Literal[256], Literal[256]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ori_shape=tuple[Literal[256], Literal[256]])",
       "severity": "error"
     },
     {
-      "line": 249,
-      "column": 93,
-      "stop_line": 249,
-      "stop_column": 96,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 62,
+      "column": 31,
+      "stop_line": 62,
+      "stop_column": 109,
+      "path": "tests/unit/backend/native/models/segmentation/segmentors/test_base_model.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ignored_labels=list[int], ori_shape=tuple[Literal[256], Literal[256]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[256], Literal[256]], img_idx=Literal[0], ignored_labels=list[int], ori_shape=tuple[Literal[256], Literal[256]])",
       "severity": "error"
     },
     {
-      "line": 252,
-      "column": 104,
-      "stop_line": 252,
-      "stop_column": 107,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 37,
+      "column": 19,
+      "stop_line": 37,
+      "stop_column": 71,
+      "path": "tests/unit/backend/native/models/segmentation/test_base.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `masks` with type `list[Mask] | None` in function `otx.data.entity.sample.OTXSampleBatch.__init__`",
       "severity": "error"
     },
     {
-      "line": 253,
-      "column": 106,
-      "stop_line": 253,
-      "stop_column": 109,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 116,
+      "column": 20,
+      "stop_line": 116,
+      "stop_column": 45,
+      "path": "tests/unit/backend/native/models/segmentation/test_base.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `list[Tensor] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 254,
-      "column": 107,
-      "stop_line": 254,
-      "stop_column": 110,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 58,
+      "column": 21,
+      "stop_line": 58,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/models/segmentation/test_dino_v2_seg.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
+      "concise_description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
       "severity": "error"
     },
     {
-      "line": 256,
-      "column": 106,
-      "stop_line": 256,
-      "stop_column": 109,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 66,
+      "column": 21,
+      "stop_line": 66,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/models/segmentation/test_segnext.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
+      "concise_description": "Argument `Tensor` is not assignable to parameter with type `(ParamSpec(_InputT)) -> _RetT`",
       "severity": "error"
     },
     {
-      "line": 258,
-      "column": 99,
-      "stop_line": 258,
-      "stop_column": 102,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 58,
+      "column": 41,
+      "stop_line": 58,
+      "stop_column": 46,
+      "path": "tests/unit/backend/native/models/test_base.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `dict[str, Tensor]` is not assignable to parameter `batch` with type `OTXSampleBatch` in function `otx.backend.native.models.base.OTXModel.training_step`",
+      "concise_description": "Argument `dict[str, Tensor]` is not assignable to parameter `batch` with type `OTXSampleBatch` in function `otx.backend.native.models.base.OTXModel.training_step`",
       "severity": "error"
     },
     {
-      "line": 264,
-      "column": 92,
-      "stop_line": 264,
-      "stop_column": 95,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 144,
+      "column": 47,
+      "stop_line": 144,
+      "stop_column": 50,
+      "path": "tests/unit/backend/native/models/test_base.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "name": "bad-typed-dict-key",
+      "description": "`Literal[255]` is not assignable to TypedDict key `ignore_index` with type `list[list[str]] | list[str]`",
+      "concise_description": "`Literal[255]` is not assignable to TypedDict key `ignore_index` with type `list[list[str]] | list[str]`",
       "severity": "error"
     },
     {
-      "line": 268,
-      "column": 92,
-      "stop_line": 268,
-      "stop_column": 95,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 191,
+      "column": 17,
+      "stop_line": 191,
+      "stop_column": 39,
+      "path": "tests/unit/backend/native/models/test_base.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `dict[str, Any]` is not assignable to parameter `label_info` with type `HLabelInfo` in function `otx.backend.native.models.classification.hlabel_models.base.OTXHlabelClsModel.__init__`",
+      "concise_description": "Argument `dict[str, Any]` is not assignable to parameter `label_info` with type `HLabelInfo` in function `otx.backend.native.models.classification.hlabel_models.base.OTXHlabelClsModel.__init__`",
       "severity": "error"
     },
     {
-      "line": 272,
-      "column": 109,
-      "stop_line": 272,
-      "stop_column": 112,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 231,
+      "column": 73,
+      "stop_line": 231,
+      "stop_column": 77,
+      "path": "tests/unit/backend/native/models/test_base.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `None` is not assignable to parameter `metric` with type `Tensor` in function `otx.backend.native.models.base.OTXModel.lr_scheduler_step`",
+      "concise_description": "Argument `None` is not assignable to parameter `metric` with type `Tensor` in function `otx.backend.native.models.base.OTXModel.lr_scheduler_step`",
       "severity": "error"
     },
     {
-      "line": 277,
-      "column": 109,
-      "stop_line": 277,
-      "stop_column": 112,
-      "path": "tests/unit/data/test_pre_filtering.py",
+      "line": 242,
+      "column": 73,
+      "stop_line": 242,
+      "stop_column": 77,
+      "path": "tests/unit/backend/native/models/test_base.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
-      "concise_description": "Argument `list[int]` is not assignable to parameter `labels` with type `list[str]` in function `otx.data.utils.pre_filtering.is_valid_anno_for_task`",
+      "description": "Argument `None` is not assignable to parameter `metric` with type `Tensor` in function `otx.backend.native.models.base.OTXModel.lr_scheduler_step`",
+      "concise_description": "Argument `None` is not assignable to parameter `metric` with type `Tensor` in function `otx.backend.native.models.base.OTXModel.lr_scheduler_step`",
       "severity": "error"
     },
     {
-      "line": 25,
-      "column": 20,
-      "stop_line": 32,
-      "stop_column": 14,
-      "path": "tests/unit/data/test_transform_libs.py",
+      "line": 21,
+      "column": 42,
+      "stop_line": 21,
+      "stop_column": 46,
+      "path": "tests/unit/backend/native/models/utils/test_segmentation.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `Compose` is not assignable to declared return type `list[dict[str, Any]]`",
-      "concise_description": "Returned type `Compose` is not assignable to declared return type `list[dict[str, Any]]`",
+      "name": "bad-argument-type",
+      "description": "Argument `None` is not assignable to parameter `ctx` with type `(...) -> Unknown` in function `otx.backend.native.models.segmentation.modules.blocks.OnnxLpNormalization.forward`",
+      "concise_description": "Argument `None` is not assignable to parameter `ctx` with type `(...) -> Unknown` in function `otx.backend.native.models.segmentation.modules.blocks.OnnxLpNormalization.forward`",
       "severity": "error"
     },
     {
-      "line": 53,
-      "column": 20,
-      "stop_line": 56,
-      "stop_column": 14,
-      "path": "tests/unit/data/test_transform_libs.py",
+      "line": 39,
+      "column": 47,
+      "stop_line": 39,
+      "stop_column": 64,
+      "path": "tests/unit/backend/native/models/utils/test_segmentation.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `SubsetConfig` is not assignable to declared return type `list[dict[str, Any]]`",
-      "concise_description": "Returned type `SubsetConfig` is not assignable to declared return type `list[dict[str, Any]]`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `Tensor` in function `otx.backend.native.models.segmentation.modules.aggregators.IterativeAggregator.forward`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `x` with type `Tensor` in function `otx.backend.native.models.segmentation.modules.aggregators.IterativeAggregator.forward`",
       "severity": "error"
     },
     {
-      "line": 57,
+      "line": 42,
       "column": 16,
-      "stop_line": 57,
-      "stop_column": 23,
-      "path": "tests/unit/data/test_transform_libs.py",
+      "stop_line": 42,
+      "stop_column": 37,
+      "path": "tests/unit/backend/native/optimizers/test_callable.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `list[dict[str, Any]]`",
-      "concise_description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `list[dict[str, Any]]`",
+      "name": "missing-attribute",
+      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `lr`",
+      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `lr`",
       "severity": "error"
     },
     {
-      "line": 82,
-      "column": 54,
-      "stop_line": 82,
-      "stop_column": 57,
-      "path": "tests/unit/data/test_transform_libs.py",
+      "line": 43,
+      "column": 16,
+      "stop_line": 43,
+      "stop_column": 43,
+      "path": "tests/unit/backend/native/optimizers/test_callable.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DictConfig | ListConfig` is not assignable to parameter `config` with type `SubsetConfig` in function `otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate`",
-      "concise_description": "Argument `DictConfig | ListConfig` is not assignable to parameter `config` with type `SubsetConfig` in function `otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate`",
+      "name": "missing-attribute",
+      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `momentum`",
+      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `momentum`",
       "severity": "error"
     },
     {
-      "line": 88,
-      "column": 54,
-      "stop_line": 88,
-      "stop_column": 57,
-      "path": "tests/unit/data/test_transform_libs.py",
+      "line": 44,
+      "column": 16,
+      "stop_line": 44,
+      "stop_column": 47,
+      "path": "tests/unit/backend/native/optimizers/test_callable.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DictConfig | ListConfig` is not assignable to parameter `config` with type `SubsetConfig` in function `otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate`",
-      "concise_description": "Argument `DictConfig | ListConfig` is not assignable to parameter `config` with type `SubsetConfig` in function `otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate`",
+      "name": "missing-attribute",
+      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `weight_decay`",
+      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `weight_decay`",
       "severity": "error"
     },
     {
-      "line": 94,
-      "column": 54,
-      "stop_line": 94,
-      "stop_column": 57,
-      "path": "tests/unit/data/test_transform_libs.py",
+      "line": 58,
+      "column": 16,
+      "stop_line": 58,
+      "stop_column": 37,
+      "path": "tests/unit/backend/native/optimizers/test_callable.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `DictConfig | ListConfig` is not assignable to parameter `config` with type `SubsetConfig` in function `otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate`",
-      "concise_description": "Argument `DictConfig | ListConfig` is not assignable to parameter `config` with type `SubsetConfig` in function `otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate`",
+      "name": "missing-attribute",
+      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `lr`",
+      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `lr`",
       "severity": "error"
     },
     {
-      "line": 115,
+      "line": 59,
       "column": 16,
-      "stop_line": 115,
-      "stop_column": 37,
-      "path": "tests/unit/data/test_transform_libs.py",
+      "stop_line": 59,
+      "stop_column": 43,
+      "path": "tests/unit/backend/native/optimizers/test_callable.py",
       "code": -2,
-      "name": "bad-return",
-      "description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `list[dict[str, Any]]`",
-      "concise_description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `list[dict[str, Any]]`",
+      "name": "missing-attribute",
+      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `momentum`",
+      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `momentum`",
       "severity": "error"
     },
     {
-      "line": 120,
+      "line": 60,
       "column": 16,
-      "stop_line": 120,
-      "stop_column": 45,
-      "path": "tests/unit/data/test_transform_libs.py",
+      "stop_line": 60,
+      "stop_column": 47,
+      "path": "tests/unit/backend/native/optimizers/test_callable.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `FunctionType` has no attribute `scale`",
-      "concise_description": "Object of class `FunctionType` has no attribute `scale`",
+      "description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `weight_decay`",
+      "concise_description": "Object of class `OptimizerCallableSupportAdaptiveBS` has no attribute `weight_decay`",
       "severity": "error"
     },
     {
-      "line": 121,
+      "line": 67,
       "column": 16,
-      "stop_line": 121,
-      "stop_column": 49,
-      "path": "tests/unit/data/test_transform_libs.py",
+      "stop_line": 67,
+      "stop_column": 37,
+      "path": "tests/unit/backend/native/schedulers/test_warmup_schedulers.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `FunctionType` has no attribute `crop_size`",
-      "concise_description": "Object of class `FunctionType` has no attribute `crop_size`",
+      "description": "Object of class `LRScheduler` has no attribute `monitor`",
+      "concise_description": "Object of class `LRScheduler` has no attribute `monitor`",
       "severity": "error"
     },
     {
-      "line": 122,
+      "line": 80,
       "column": 16,
-      "stop_line": 122,
-      "stop_column": 45,
-      "path": "tests/unit/data/test_transform_libs.py",
+      "stop_line": 80,
+      "stop_column": 37,
+      "path": "tests/unit/backend/native/schedulers/test_warmup_schedulers.py",
       "code": -2,
       "name": "missing-attribute",
-      "description": "Object of class `FunctionType` has no attribute `scale`",
-      "concise_description": "Object of class `FunctionType` has no attribute `scale`",
+      "description": "Object of class `LRScheduler` has no attribute `monitor`",
+      "concise_description": "Object of class `LRScheduler` has no attribute `monitor`",
       "severity": "error"
     },
     {
-      "line": 145,
-      "column": 48,
-      "stop_line": 145,
-      "stop_column": 70,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 220,
+      "column": 18,
+      "stop_line": 220,
+      "stop_column": 27,
+      "path": "tests/unit/backend/native/test_engine.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `BoundingBoxes | ndarray[Unknown, Unknown]` is not assignable to parameter `other` with type `Tensor` in function `torch._C._VariableFunctions.equal`",
-      "concise_description": "Argument `BoundingBoxes | ndarray[Unknown, Unknown]` is not assignable to parameter `other` with type `Tensor` in function `torch._C._VariableFunctions.equal`",
+      "description": "Argument `Literal['MULTI_CLASS_CLS']` is not assignable to parameter `task` with type `OTXTaskType` in function `otx.backend.native.engine.OTXEngine.from_model_name`",
+      "concise_description": "Argument `Literal['MULTI_CLASS_CLS']` is not assignable to parameter `task` with type `OTXTaskType` in function `otx.backend.native.engine.OTXEngine.from_model_name`",
       "severity": "error"
     },
     {
-      "line": 147,
-      "column": 37,
-      "stop_line": 147,
-      "stop_column": 88,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 232,
+      "column": 22,
+      "stop_line": 232,
+      "stop_column": 31,
+      "path": "tests/unit/backend/native/test_engine.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "description": "Argument `Literal['MULTI_CLASS_CLS']` is not assignable to parameter `task` with type `OTXTaskType` in function `otx.backend.native.engine.OTXEngine.from_model_name`",
+      "concise_description": "Argument `Literal['MULTI_CLASS_CLS']` is not assignable to parameter `task` with type `OTXTaskType` in function `otx.backend.native.engine.OTXEngine.from_model_name`",
       "severity": "error"
     },
     {
-      "line": 147,
-      "column": 37,
-      "stop_line": 147,
-      "stop_column": 88,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 26,
+      "column": 23,
+      "stop_line": 26,
+      "stop_column": 89,
+      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])",
       "severity": "error"
     },
     {
-      "line": 147,
-      "column": 90,
-      "stop_line": 147,
-      "stop_column": 94,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 194,
+      "column": 27,
+      "stop_line": 194,
+      "stop_column": 60,
+      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 147,
-      "column": 90,
-      "stop_line": 147,
-      "stop_column": 94,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 196,
+      "column": 59,
+      "stop_line": 196,
+      "stop_column": 82,
+      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
+      "name": "not-iterable",
+      "description": "Type `None` is not iterable",
+      "concise_description": "Type `None` is not iterable",
       "severity": "error"
     },
     {
-      "line": 148,
-      "column": 35,
-      "stop_line": 148,
-      "stop_column": 40,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 215,
+      "column": 27,
+      "stop_line": 215,
+      "stop_column": 60,
+      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to parameter `bboxes1` with type `Tensor` in function `otx.data.transform_libs.utils.overlap_bboxes`",
-      "concise_description": "Argument `((...) -> Any) | ((ParamSpec(@_)) -> @_) | Unknown` is not assignable to parameter `bboxes1` with type `Tensor` in function `otx.data.transform_libs.utils.overlap_bboxes`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 192,
-      "column": 49,
-      "stop_line": 192,
-      "stop_column": 78,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 217,
+      "column": 92,
+      "stop_line": 217,
+      "stop_column": 115,
+      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `bboxes`",
-      "concise_description": "Object of class `tuple` has no attribute `bboxes`",
+      "name": "bad-argument-type",
+      "description": "Argument `list[Tensor] | None` is not assignable to parameter `iterable` with type `Iterable[Tensor]` in function `enumerate.__new__`\n  Protocol `Iterable` requires attribute `__iter__`",
+      "concise_description": "Argument `list[Tensor] | None` is not assignable to parameter `iterable` with type `Iterable[Tensor]` in function `enumerate.__new__`",
       "severity": "error"
     },
     {
-      "line": 220,
-      "column": 16,
-      "stop_line": 220,
-      "stop_column": 50,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 236,
+      "column": 27,
+      "stop_line": 236,
+      "stop_column": 60,
+      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `bboxes`",
-      "concise_description": "Object of class `tuple` has no attribute `bboxes`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 238,
-      "column": 39,
-      "stop_line": 238,
-      "stop_column": 45,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 244,
+      "column": 9,
+      "stop_line": 244,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `tuple[Unknown, ...]` is not assignable to parameter `*_inputs` with type `OTXSample` in function `otx.data.transform_libs.torchvision.RandomFlip.forward`",
-      "concise_description": "Argument `tuple[Unknown, ...]` is not assignable to parameter `*_inputs` with type `OTXSample` in function `otx.data.transform_libs.torchvision.RandomFlip.forward`",
+      "description": "Argument `list[Tensor]` is not assignable to parameter `saliency_map` with type `list[ndarray[Unknown, Unknown]]` in function `otx.backend.native.models.utils.xai_utils.process_saliency_maps`",
+      "concise_description": "Argument `list[Tensor]` is not assignable to parameter `saliency_map` with type `list[ndarray[Unknown, Unknown]]` in function `otx.backend.native.models.utils.xai_utils.process_saliency_maps`",
       "severity": "error"
     },
     {
-      "line": 241,
-      "column": 37,
-      "stop_line": 241,
+      "line": 254,
+      "column": 49,
+      "stop_line": 254,
       "stop_column": 50,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `image`",
-      "concise_description": "Object of class `NoneType` has no attribute `image`",
+      "name": "bad-index",
+      "description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`\n  Argument `Literal[0]` is not assignable to parameter `key` with type `str` in function `dict.__getitem__`",
+      "concise_description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`",
       "severity": "error"
     },
     {
-      "line": 241,
-      "column": 62,
-      "stop_line": 241,
-      "stop_column": 95,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 255,
+      "column": 48,
+      "stop_line": 255,
+      "stop_column": 49,
+      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `image`",
-      "concise_description": "Object of class `tuple` has no attribute `image`",
+      "name": "bad-index",
+      "description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`\n  Argument `Literal[0]` is not assignable to parameter `key` with type `str` in function `dict.__getitem__`",
+      "concise_description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`",
       "severity": "error"
     },
     {
-      "line": 244,
-      "column": 26,
-      "stop_line": 244,
-      "stop_column": 40,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 256,
+      "column": 54,
+      "stop_line": 256,
+      "stop_column": 55,
+      "path": "tests/unit/backend/native/tools/explain/test_saliency_map_processing.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `bboxes`\nObject of class `OTXSample` has no attribute `bboxes`",
-      "concise_description": "Object of class `NoneType` has no attribute `bboxes`\nObject of class `OTXSample` has no attribute `bboxes`",
+      "name": "bad-index",
+      "description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`\n  Argument `Literal[0]` is not assignable to parameter `key` with type `str` in function `dict.__getitem__`",
+      "concise_description": "Cannot index into `dict[str, Tensor | ndarray[Unknown, Unknown]]`",
       "severity": "error"
     },
     {
-      "line": 245,
-      "column": 34,
-      "stop_line": 245,
-      "stop_column": 50,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 31,
+      "column": 16,
+      "stop_line": 31,
+      "stop_column": 36,
+      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `img_info`",
-      "concise_description": "Object of class `NoneType` has no attribute `img_info`",
+      "name": "bad-return",
+      "description": "Returned type `Tensor` is not assignable to declared return type `None`",
+      "concise_description": "Returned type `Tensor` is not assignable to declared return type `None`",
       "severity": "error"
     },
     {
-      "line": 245,
-      "column": 66,
-      "stop_line": 245,
-      "stop_column": 80,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 36,
+      "column": 9,
+      "stop_line": 36,
+      "stop_column": 28,
+      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `bboxes`\nObject of class `OTXSample` has no attribute `bboxes`",
-      "concise_description": "Object of class `NoneType` has no attribute `bboxes`\nObject of class `OTXSample` has no attribute `bboxes`",
+      "name": "bad-argument-type",
+      "description": "Argument `(_: Unknown) -> None` is not assignable to parameter `head_forward_fn` with type `(FeatureMapType) -> Tensor` in function `otx.backend.native.tools.explain.explain_algo.ReciproCAM.__init__`",
+      "concise_description": "Argument `(_: Unknown) -> None` is not assignable to parameter `head_forward_fn` with type `(FeatureMapType) -> Tensor` in function `otx.backend.native.tools.explain.explain_algo.ReciproCAM.__init__`",
       "severity": "error"
     },
     {
-      "line": 246,
-      "column": 34,
-      "stop_line": 246,
-      "stop_column": 50,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 51,
+      "column": 16,
+      "stop_line": 51,
+      "stop_column": 37,
+      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `img_info`",
-      "concise_description": "Object of class `NoneType` has no attribute `img_info`",
+      "name": "bad-return",
+      "description": "Returned type `Tensor` is not assignable to declared return type `None`",
+      "concise_description": "Returned type `Tensor` is not assignable to declared return type `None`",
       "severity": "error"
     },
     {
-      "line": 246,
-      "column": 66,
-      "stop_line": 246,
-      "stop_column": 80,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 55,
+      "column": 9,
+      "stop_line": 55,
+      "stop_column": 28,
+      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `bboxes`\nObject of class `OTXSample` has no attribute `bboxes`",
-      "concise_description": "Object of class `NoneType` has no attribute `bboxes`\nObject of class `OTXSample` has no attribute `bboxes`",
+      "name": "bad-argument-type",
+      "description": "Argument `(_: Unknown) -> None` is not assignable to parameter `head_forward_fn` with type `(FeatureMapType) -> Tensor` in function `otx.backend.native.tools.explain.explain_algo.ViTReciproCAM.__init__`",
+      "concise_description": "Argument `(_: Unknown) -> None` is not assignable to parameter `head_forward_fn` with type `(FeatureMapType) -> Tensor` in function `otx.backend.native.tools.explain.explain_algo.ViTReciproCAM.__init__`",
       "severity": "error"
     },
     {
-      "line": 247,
-      "column": 25,
-      "stop_line": 247,
-      "stop_column": 79,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 99,
+      "column": 29,
+      "stop_line": 99,
+      "stop_column": 80,
+      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
       "code": -2,
       "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C._VariableFunctions.all` called with arguments: (bool)\n  Possible overloads:\n  (input: Tensor, *, out: Tensor | None = None) -> Tensor [closest match]\n  (input: Tensor, dim: Size | list[int] | tuple[int, ...] | None = None, keepdim: bool = False, *, out: Tensor | None = None) -> Tensor\n  (input: Tensor, dim: int, keepdim: bool = False, *, out: Tensor | None = None) -> Tensor\n  (input: Tensor, dim: EllipsisType | str | None, keepdim: bool = False, *, out: Tensor | None = None) -> Tensor",
-      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.all` called with arguments: (bool)",
-      "severity": "error"
-    },
-    {
-      "line": 247,
-      "column": 44,
-      "stop_line": 247,
-      "stop_column": 78,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `bboxes`",
-      "concise_description": "Object of class `tuple` has no attribute `bboxes`",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[10], Literal[10]], ori_shape=tuple[Literal[10], Literal[10]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=Literal[0], img_shape=tuple[Literal[10], Literal[10]], ori_shape=tuple[Literal[10], Literal[10]])",
       "severity": "error"
     },
     {
-      "line": 250,
-      "column": 42,
-      "stop_line": 250,
-      "stop_column": 55,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 102,
+      "column": 39,
+      "stop_line": 106,
+      "stop_column": 14,
+      "path": "tests/unit/backend/native/tools/explain/test_xai_algorithms.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `NoneType` has no attribute `masks`\nObject of class `OTXSample` has no attribute `masks`",
-      "concise_description": "Object of class `NoneType` has no attribute `masks`\nObject of class `OTXSample` has no attribute `masks`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xywh'], canvas_size=tuple[Literal[10], Literal[10]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xywh'], canvas_size=tuple[Literal[10], Literal[10]])",
       "severity": "error"
     },
     {
-      "line": 250,
-      "column": 69,
-      "stop_line": 250,
-      "stop_column": 102,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 23,
+      "column": 31,
+      "stop_line": 23,
+      "stop_column": 35,
+      "path": "tests/unit/backend/native/utils/test_api.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `masks`",
-      "concise_description": "Object of class `tuple` has no attribute `masks`",
+      "name": "bad-argument-type",
+      "description": "Argument `str` is not assignable to parameter `task` with type `OTXTaskType | None` in function `otx.backend.native.cli.utils.list_models`",
+      "concise_description": "Argument `str` is not assignable to parameter `task` with type `OTXTaskType | None` in function `otx.backend.native.cli.utils.list_models`",
       "severity": "error"
     },
     {
-      "line": 468,
-      "column": 36,
-      "stop_line": 468,
-      "stop_column": 39,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 74,
+      "column": 16,
+      "stop_line": 74,
+      "stop_column": 23,
+      "path": "tests/unit/backend/native/utils/test_instantiators.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Literal[640]` is not assignable to parameter `img_scale` with type `list[int] | tuple[int, int]` in function `otx.data.transform_libs.torchvision.CachedMosaic.__init__`",
-      "concise_description": "Argument `Literal[640]` is not assignable to parameter `img_scale` with type `list[int] | tuple[int, int]` in function `otx.data.transform_libs.torchvision.CachedMosaic.__init__`",
+      "description": "Argument `list[partial[Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `list[partial[Unknown]] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 481,
-      "column": 39,
-      "stop_line": 481,
-      "stop_column": 102,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 75,
+      "column": 12,
+      "stop_line": 75,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/utils/test_instantiators.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[tuple[Unknown, ...]]` is not assignable to attribute `results_cache` with type `list[OTXSample]`",
-      "concise_description": "`list[tuple[Unknown, ...]]` is not assignable to attribute `results_cache` with type `list[OTXSample]`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 490,
-      "column": 43,
-      "stop_line": 490,
-      "stop_column": 76,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 76,
+      "column": 12,
+      "stop_line": 76,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/utils/test_instantiators.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `image`",
-      "concise_description": "Object of class `tuple` has no attribute `image`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 491,
-      "column": 44,
-      "stop_line": 491,
-      "stop_column": 78,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 77,
+      "column": 12,
+      "stop_line": 77,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/utils/test_instantiators.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `bboxes`",
-      "concise_description": "Object of class `tuple` has no attribute `bboxes`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 500,
-      "column": 39,
-      "stop_line": 500,
-      "stop_column": 51,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 78,
+      "column": 12,
+      "stop_line": 78,
+      "stop_column": 22,
+      "path": "tests/unit/backend/native/utils/test_instantiators.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[tuple[Unknown, ...]]` is not assignable to attribute `results_cache` with type `list[OTXSample]`",
-      "concise_description": "`list[tuple[Unknown, ...]]` is not assignable to attribute `results_cache` with type `list[OTXSample]`",
+      "name": "unsupported-operation",
+      "description": "`None` is not subscriptable",
+      "concise_description": "`None` is not subscriptable",
       "severity": "error"
     },
     {
-      "line": 522,
-      "column": 35,
-      "stop_line": 522,
-      "stop_column": 38,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 96,
+      "column": 12,
+      "stop_line": 96,
+      "stop_column": 31,
+      "path": "tests/unit/backend/native/utils/test_instantiators.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal[640]` is not assignable to parameter `img_scale` with type `list[int] | tuple[int, int]` in function `otx.data.transform_libs.torchvision.CachedMixUp.__init__`",
-      "concise_description": "Argument `Literal[640]` is not assignable to parameter `img_scale` with type `list[int] | tuple[int, int]` in function `otx.data.transform_libs.torchvision.CachedMixUp.__init__`",
+      "name": "missing-attribute",
+      "description": "Object of class `Sampler` has no attribute `num_samples`",
+      "concise_description": "Object of class `Sampler` has no attribute `num_samples`",
       "severity": "error"
     },
     {
-      "line": 535,
-      "column": 38,
-      "stop_line": 535,
-      "stop_column": 100,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 97,
+      "column": 12,
+      "stop_line": 97,
+      "stop_column": 31,
+      "path": "tests/unit/backend/native/utils/test_instantiators.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[tuple[Unknown, ...]]` is not assignable to attribute `results_cache` with type `list[OTXSample]`",
-      "concise_description": "`list[tuple[Unknown, ...]]` is not assignable to attribute `results_cache` with type `list[OTXSample]`",
+      "name": "missing-attribute",
+      "description": "Object of class `Sampler` has no attribute `replacement`",
+      "concise_description": "Object of class `Sampler` has no attribute `replacement`",
       "severity": "error"
     },
     {
-      "line": 544,
-      "column": 43,
-      "stop_line": 544,
-      "stop_column": 76,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 19,
+      "column": 29,
+      "stop_line": 19,
+      "stop_column": 57,
+      "path": "tests/unit/backend/native/utils/test_mask_utils.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `image`",
-      "concise_description": "Object of class `tuple` has no attribute `image`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C._VariableFunctions.randint` called with arguments: (low=Literal[0], high=Literal[2], size=tuple[Tensor, Tensor])\n  Possible overloads:\n  (low: int, high: int, size: _size, *, generator: Generator | None = None, dtype: dtype | None = None, device: device | int | str | None = None, requires_grad: bool = False, pin_memory: bool = False) -> Tensor [closest match]\n  (high: int, size: _size, *, generator: Generator | None = None, dtype: dtype | None = None, device: device | int | str | None = None, requires_grad: bool = False, pin_memory: bool = False) -> Tensor\n  (high: SymInt | int, size: Sequence[SymInt | int], *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (high: SymInt | int, size: Sequence[SymInt | int], *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (low: SymInt | int, high: SymInt | int, size: Sequence[SymInt | int], *, generator: Generator | None, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor\n  (low: SymInt | int, high: SymInt | int, size: Sequence[SymInt | int], *, out: Tensor | None = None, dtype: dtype | None = None, layout: layout | None = None, device: device | int | str | None = None, pin_memory: bool | None = False, requires_grad: bool | None = False) -> Tensor",
+      "concise_description": "No matching overload found for function `torch._C._VariableFunctions.randint` called with arguments: (low=Literal[0], high=Literal[2], size=tuple[Tensor, Tensor])",
       "severity": "error"
     },
     {
-      "line": 545,
-      "column": 44,
-      "stop_line": 545,
-      "stop_column": 78,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 21,
+      "column": 43,
+      "stop_line": 21,
+      "stop_column": 74,
+      "path": "tests/unit/backend/native/utils/test_mask_utils.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `bboxes`",
-      "concise_description": "Object of class `tuple` has no attribute `bboxes`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `pycocotools.mask.frPyObjects` called with arguments: (dict[Unknown, Unknown], *Unknown)\n  Possible overloads:\n  (pyobj: list[list[int]] | list[_EncodedRLE] | ndarray[tuple[int, ...], dtype[unsignedinteger[_32Bit]]], h: int, w: int) -> list[_EncodedRLE] [closest match]\n  (pyobj: list[int] | _EncodedRLE, h: int, w: int) -> _EncodedRLE",
+      "concise_description": "No matching overload found for function `pycocotools.mask.frPyObjects` called with arguments: (dict[Unknown, Unknown], *Unknown)",
       "severity": "error"
     },
     {
-      "line": 554,
-      "column": 38,
-      "stop_line": 554,
-      "stop_column": 46,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 17,
+      "column": 24,
+      "stop_line": 17,
+      "stop_column": 50,
+      "path": "tests/unit/backend/openvino/conftest.py",
       "code": -2,
-      "name": "bad-assignment",
-      "description": "`list[tuple[Unknown, ...]]` is not assignable to attribute `results_cache` with type `list[OTXSample]`",
-      "concise_description": "`list[tuple[Unknown, ...]]` is not assignable to attribute `results_cache` with type `list[OTXSample]`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `openvino._ov_api.Model.__init__` called with arguments: (Parameter, list[Parameter])\n  Possible overloads:\n  (other: Model) -> None\n  (results: Sequence[Result], sinks: Sequence[Node], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Result], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Node], parameters: Sequence[Parameter], name: str = '') -> None\n  (result: Node, parameters: Sequence[Parameter], name: str = '') -> None [closest match]\n  (results: Sequence[Output], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Output], sinks: Sequence[Node], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Output], sinks: Sequence[Output], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Output], sinks: Sequence[Output], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None\n  (results: Sequence[Result], sinks: Sequence[Output], parameters: Sequence[Parameter], name: str = '') -> None\n  (results: Sequence[Result], sinks: Sequence[Output], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None\n  (results: Sequence[Result], sinks: Sequence[Node], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None\n  (results: Sequence[Output], sinks: Sequence[Node], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None\n  (results: Sequence[Result], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None\n  (results: Sequence[Output], parameters: Sequence[Parameter], variables: Sequence[Variable], name: str = '') -> None",
+      "concise_description": "No matching overload found for function `openvino._ov_api.Model.__init__` called with arguments: (Parameter, list[Parameter])",
       "severity": "error"
     },
     {
-      "line": 671,
-      "column": 40,
-      "stop_line": 671,
-      "stop_column": 53,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 119,
+      "column": 22,
+      "stop_line": 119,
+      "stop_column": 26,
+      "path": "tests/unit/cli/utils/test_jsonargparse.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `bboxes`",
-      "concise_description": "Object of class `tuple` has no attribute `bboxes`",
+      "name": "bad-argument-type",
+      "description": "Argument `None` is not assignable to parameter `self` with type `ActionConfigFile` in function `otx.cli.utils.jsonargparse.apply_config`",
+      "concise_description": "Argument `None` is not assignable to parameter `self` with type `ActionConfigFile` in function `otx.cli.utils.jsonargparse.apply_config`",
       "severity": "error"
     },
     {
-      "line": 672,
-      "column": 40,
-      "stop_line": 672,
-      "stop_column": 53,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 408,
+      "column": 32,
+      "stop_line": 408,
+      "stop_column": 41,
+      "path": "tests/unit/cli/utils/test_jsonargparse.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `tuple` has no attribute `bboxes`",
-      "concise_description": "Object of class `tuple` has no attribute `bboxes`",
+      "name": "bad-argument-type",
+      "description": "Argument `type[test_get_short_docstring.Component]` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
+      "concise_description": "Argument `type[test_get_short_docstring.Component]` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
       "severity": "error"
     },
     {
-      "line": 708,
-      "column": 26,
-      "stop_line": 708,
-      "stop_column": 50,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 409,
+      "column": 32,
+      "stop_line": 409,
+      "stop_column": 48,
+      "path": "tests/unit/cli/utils/test_jsonargparse.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `list[int | list[int] | tuple[int, int]]` is not assignable to parameter `scale` with type `Sequence[int | tuple[int, int]]` in function `otx.data.transform_libs.torchvision.RandomResize.__init__`",
-      "concise_description": "Argument `list[int | list[int] | tuple[int, int]]` is not assignable to parameter `scale` with type `Sequence[int | tuple[int, int]]` in function `otx.data.transform_libs.torchvision.RandomResize.__init__`",
+      "description": "Argument `(self: test_get_short_docstring.Component) -> None` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
+      "concise_description": "Argument `(self: test_get_short_docstring.Component) -> None` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
       "severity": "error"
     },
     {
-      "line": 775,
-      "column": 34,
-      "stop_line": 775,
-      "stop_column": 43,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 410,
+      "column": 32,
+      "stop_line": 410,
+      "stop_column": 48,
+      "path": "tests/unit/cli/utils/test_jsonargparse.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `tuple[float | int]` is not assignable to parameter `crop_size` with type `tuple[int, int]` in function `otx.data.transform_libs.torchvision.RandomCrop.__init__`",
-      "concise_description": "Argument `tuple[float | int]` is not assignable to parameter `crop_size` with type `tuple[int, int]` in function `otx.data.transform_libs.torchvision.RandomCrop.__init__`",
+      "description": "Argument `type[test_get_short_docstring.WithoutDocstring]` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
+      "concise_description": "Argument `type[test_get_short_docstring.WithoutDocstring]` is not assignable to parameter `component` with type `TypeVar` in function `otx.cli.utils.jsonargparse.get_short_docstring`",
       "severity": "error"
     },
     {
-      "line": 780,
-      "column": 42,
-      "stop_line": 780,
-      "stop_column": 51,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 56,
+      "column": 20,
+      "stop_line": 56,
+      "stop_column": 104,
+      "path": "tests/unit/data/conftest.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `tuple[float | int]` is not assignable to parameter `crop_size` with type `tuple[int, int]` in function `otx.data.transform_libs.torchvision.RandomCrop.__init__`",
-      "concise_description": "Argument `tuple[float | int]` is not assignable to parameter `crop_size` with type `tuple[int, int]` in function `otx.data.transform_libs.torchvision.RandomCrop.__init__`",
+      "description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
+      "concise_description": "Argument `list[list[str]]` is not assignable to parameter `all_groups` with type `list[tuple[str, ...]]` in function `otx.types.label.HLabelInfo.__init__`",
       "severity": "error"
     },
     {
-      "line": 800,
-      "column": 42,
-      "stop_line": 800,
-      "stop_column": 52,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 212,
+      "column": 30,
+      "stop_line": 212,
+      "stop_column": 42,
+      "path": "tests/unit/data/dataset/test_base.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `tuple[float, float]` is not assignable to parameter `crop_size` with type `tuple[int, int]` in function `otx.data.transform_libs.torchvision.RandomCrop.__init__`",
-      "concise_description": "Argument `tuple[float, float]` is not assignable to parameter `crop_size` with type `tuple[int, int]` in function `otx.data.transform_libs.torchvision.RandomCrop.__init__`",
+      "name": "bad-assignment",
+      "description": "`Literal['not_a_list']` is not assignable to attribute `transforms` with type `((...) -> Unknown) | CPUAugmentationPipeline | Compose | dict[str, ((...) -> Unknown) | Compose | list[(...) -> Unknown]] | list[(...) -> Unknown] | None`",
+      "concise_description": "`Literal['not_a_list']` is not assignable to attribute `transforms` with type `((...) -> Unknown) | CPUAugmentationPipeline | Compose | dict[str, ((...) -> Unknown) | Compose | list[(...) -> Unknown]] | list[(...) -> Unknown] | None`",
       "severity": "error"
     },
     {
-      "line": 830,
-      "column": 46,
-      "stop_line": 830,
-      "stop_column": 82,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 254,
+      "column": 20,
+      "stop_line": 254,
+      "stop_column": 56,
+      "path": "tests/unit/data/dataset/test_base.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "missing-attribute",
+      "description": "Object of class `FunctionType` has no attribute `call_count`",
+      "concise_description": "Object of class `FunctionType` has no attribute `call_count`",
       "severity": "error"
     },
     {
-      "line": 830,
-      "column": 46,
-      "stop_line": 830,
-      "stop_column": 82,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 106,
+      "column": 39,
+      "stop_line": 106,
+      "stop_column": 43,
+      "path": "tests/unit/data/dataset/test_mixins.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
-      "severity": "error"
+      "name": "unnecessary-comparison",
+      "description": "Identity comparison `True is True` is always True",
+      "concise_description": "Identity comparison `True is True` is always True",
+      "severity": "warn"
     },
     {
-      "line": 830,
-      "column": 84,
-      "stop_line": 830,
-      "stop_column": 88,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 53,
+      "column": 53,
+      "stop_line": 53,
+      "stop_column": 56,
+      "path": "tests/unit/data/dataset/test_tile.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "bad-argument-type",
+      "description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
+      "concise_description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
       "severity": "error"
     },
     {
-      "line": 830,
-      "column": 84,
-      "stop_line": 830,
-      "stop_column": 88,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 64,
+      "column": 53,
+      "stop_line": 64,
+      "stop_column": 56,
+      "path": "tests/unit/data/dataset/test_tile.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
+      "name": "bad-argument-type",
+      "description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
+      "concise_description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
       "severity": "error"
     },
     {
-      "line": 846,
-      "column": 45,
-      "stop_line": 846,
-      "stop_column": 81,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 71,
+      "column": 53,
+      "stop_line": 71,
+      "stop_column": 56,
+      "path": "tests/unit/data/dataset/test_tile.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
+      "concise_description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
       "severity": "error"
     },
     {
-      "line": 846,
-      "column": 45,
-      "stop_line": 846,
-      "stop_column": 81,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 78,
+      "column": 53,
+      "stop_line": 78,
+      "stop_column": 56,
+      "path": "tests/unit/data/dataset/test_tile.py",
       "code": -2,
       "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
+      "description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
+      "concise_description": "Argument `DummyTileConfig` is not assignable to parameter `tile_config` with type `TileConfig` in function `otx.data.dataset.tile.OTXTileDatasetFactory.create`",
       "severity": "error"
     },
     {
-      "line": 846,
-      "column": 83,
-      "stop_line": 846,
-      "stop_column": 87,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 19,
+      "column": 31,
+      "stop_line": 19,
+      "stop_column": 76,
+      "path": "tests/unit/data/entity/conftest.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, dtype=dtype)\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None [closest match]\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (Tensor, dtype=dtype)",
       "severity": "error"
     },
     {
-      "line": 846,
-      "column": 83,
-      "stop_line": 846,
-      "stop_column": 87,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 91,
+      "column": 66,
+      "stop_line": 91,
+      "stop_column": 89,
+      "path": "tests/unit/data/entity/test_base.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
+      "name": "missing-attribute",
+      "description": "Object of class `Tensor` has no attribute `img_shape`",
+      "concise_description": "Object of class `Tensor` has no attribute `img_shape`",
       "severity": "error"
     },
     {
-      "line": 864,
-      "column": 45,
-      "stop_line": 864,
-      "stop_column": 64,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 25,
+      "column": 40,
+      "stop_line": 25,
+      "stop_column": 95,
+      "path": "tests/unit/data/entity/test_torch.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(...) -> Any` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_idx=int, img_shape=tuple[Literal[224], Literal[224]], ori_shape=tuple[Literal[224], Literal[224]])",
       "severity": "error"
     },
     {
-      "line": 864,
-      "column": 45,
-      "stop_line": 864,
-      "stop_column": 64,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 26,
+      "column": 53,
+      "stop_line": 30,
+      "stop_column": 14,
+      "path": "tests/unit/data/entity/test_torch.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Argument `Tensor` is not assignable to parameter `fn` with type `(ParamSpec(@_)) -> @_` in function `torch._dynamo.decorators.wrap`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xywh'], canvas_size=tuple[Literal[224], Literal[224]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xywh'], canvas_size=tuple[Literal[224], Literal[224]])",
       "severity": "error"
     },
     {
-      "line": 864,
-      "column": 66,
-      "stop_line": 864,
-      "stop_column": 70,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 37,
+      "column": 20,
+      "stop_line": 37,
+      "stop_column": 40,
+      "path": "tests/unit/data/entity/test_torch.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.eval_frame.DisableContext.__call__`",
+      "name": "bad-argument-type",
+      "description": "Argument `Sequence[ImageInfo | None] | None` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
+      "concise_description": "Argument `Sequence[ImageInfo | None] | None` is not assignable to parameter `obj` with type `Sized` in function `len`",
       "severity": "error"
     },
     {
-      "line": 864,
-      "column": 66,
-      "stop_line": 864,
-      "stop_column": 70,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 20,
+      "column": 18,
+      "stop_line": 20,
+      "stop_column": 65,
+      "path": "tests/unit/data/entity/test_utils.py",
       "code": -2,
-      "name": "unexpected-keyword",
-      "description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
-      "concise_description": "Unexpected keyword argument `like` in function `torch._dynamo.decorators.wrap`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[3], Literal[2]], img_idx=Literal[0], ori_shape=tuple[Literal[2], Literal[2]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[3], Literal[2]], img_idx=Literal[0], ori_shape=tuple[Literal[2], Literal[2]])",
       "severity": "error"
     },
     {
-      "line": 872,
-      "column": 47,
-      "stop_line": 872,
-      "stop_column": 64,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 21,
+      "column": 18,
+      "stop_line": 21,
+      "stop_column": 65,
+      "path": "tests/unit/data/entity/test_utils.py",
       "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `((...) -> Any) | ((...) -> Unknown) | Unknown` is not assignable to parameter `obj` with type `Sized` in function `len`\n  Protocol `Sized` requires attribute `__len__`",
-      "concise_description": "Argument `((...) -> Any) | ((...) -> Unknown) | Unknown` is not assignable to parameter `obj` with type `Sized` in function `len`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[3]], img_idx=Literal[1], ori_shape=tuple[Literal[2], Literal[3]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[3]], img_idx=Literal[1], ori_shape=tuple[Literal[2], Literal[3]])",
       "severity": "error"
     },
     {
-      "line": 985,
-      "column": 27,
-      "stop_line": 985,
-      "stop_column": 52,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 22,
+      "column": 18,
+      "stop_line": 22,
+      "stop_column": 65,
+      "path": "tests/unit/data/entity/test_utils.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `ndarray` has no attribute `clone`",
-      "concise_description": "Object of class `ndarray` has no attribute `clone`",
+      "name": "no-matching-overload",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[4]], img_idx=Literal[2], ori_shape=tuple[Literal[2], Literal[4]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[4]], img_idx=Literal[2], ori_shape=tuple[Literal[2], Literal[4]])",
       "severity": "error"
     },
     {
-      "line": 1022,
-      "column": 49,
-      "stop_line": 1026,
-      "stop_column": 10,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 23,
+      "column": 18,
+      "stop_line": 23,
+      "stop_column": 87,
+      "path": "tests/unit/data/entity/test_utils.py",
       "code": -2,
       "name": "no-matching-overload",
-      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xyxy'], canvas_size=tuple[Literal[64], Literal[128]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
-      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (data=Tensor, format=Literal['xyxy'], canvas_size=tuple[Literal[64], Literal[128]])",
+      "description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[3]], img_idx=Literal[3], ori_shape=tuple[Literal[1], Literal[2]], padding=tuple[Literal[0], Literal[0], Literal[1], Literal[1]])\n  Possible overloads:\n  (*args: Any, *, device: device | int | str | None = None) -> None\n  (storage: Storage) -> None\n  (other: Tensor) -> None\n  (size: _size, *, device: device | int | str | None = None) -> None [closest match]",
+      "concise_description": "No matching overload found for function `torch._C.TensorBase.__init__` called with arguments: (img_shape=tuple[Literal[2], Literal[3]], img_idx=Literal[3], ori_shape=tuple[Literal[1], Literal[2]], padding=tuple[Literal[0], Literal[0], Literal[1], Literal[1]])",
       "severity": "error"
     },
     {
-      "line": 1165,
-      "column": 9,
-      "stop_line": 1165,
-      "stop_column": 33,
-      "path": "tests/unit/data/transform_libs/test_torchvision.py",
+      "line": 166,
+      "column": 16,
+      "stop_line": 166,
+      "stop_column": 19,
+      "path": "tests/unit/data/test_module.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `DetectionSample` has no attribute `custom_attr`",
-      "concise_description": "Object of class `DetectionSample` has no attribute `custom_attr`",
+      "name": "bad-return",
+      "description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `DictConfig`",
+      "concise_description": "Returned type `DictConfig | ListConfig` is not assignable to declared return type `DictConfig`",
       "severity": "error"
     },
     {
-      "line": 73,
-      "column": 5,
-      "stop_line": 73,
-      "stop_column": 15,
-      "path": "tests/unit/data/utils/test_utils.py",
+      "line": 205,
+      "column": 35,
+      "stop_line": 205,
+      "stop_column": 43,
+      "path": "tests/unit/data/test_module.py",
       "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `ImageFromBytes` has no attribute `path`",
-      "concise_description": "Object of class `ImageFromBytes` has no attribute `path`",
+      "name": "not-a-type",
+      "description": "Expected a type form, got instance of `(obj: object, /) -> TypeIs[(...) -> object]`",
+      "concise_description": "Expected a type form, got instance of `(obj: object, /) -> TypeIs[(...) -> object]`",
       "severity": "error"
     },
     {
@@ -25249,9 +21769,9 @@
       "severity": "error"
     },
     {
-      "line": 429,
+      "line": 428,
       "column": 59,
-      "stop_line": 429,
+      "stop_line": 428,
       "stop_column": 72,
       "path": "tests/unit/metrics/test_detection_threshold_logic.py",
       "code": -2,
@@ -25261,9 +21781,9 @@
       "severity": "error"
     },
     {
-      "line": 429,
+      "line": 428,
       "column": 84,
-      "stop_line": 429,
+      "stop_line": 428,
       "stop_column": 97,
       "path": "tests/unit/metrics/test_detection_threshold_logic.py",
       "code": -2,
@@ -25273,9 +21793,9 @@
       "severity": "error"
     },
     {
-      "line": 432,
+      "line": 431,
       "column": 45,
-      "stop_line": 437,
+      "stop_line": 436,
       "stop_column": 22,
       "path": "tests/unit/metrics/test_detection_threshold_logic.py",
       "code": -2,
@@ -25297,9 +21817,9 @@
       "severity": "error"
     },
     {
-      "line": 43,
+      "line": 41,
       "column": 67,
-      "stop_line": 43,
+      "stop_line": 41,
       "stop_column": 84,
       "path": "tests/unit/tools/test_auto_configurator.py",
       "code": -2,
@@ -25309,9 +21829,9 @@
       "severity": "error"
     },
     {
-      "line": 58,
+      "line": 56,
       "column": 67,
-      "stop_line": 58,
+      "stop_line": 56,
       "stop_column": 84,
       "path": "tests/unit/tools/test_auto_configurator.py",
       "code": -2,
@@ -25321,9 +21841,9 @@
       "severity": "error"
     },
     {
-      "line": 63,
+      "line": 61,
       "column": 72,
-      "stop_line": 63,
+      "stop_line": 61,
       "stop_column": 89,
       "path": "tests/unit/tools/test_auto_configurator.py",
       "code": -2,
@@ -25333,9 +21853,9 @@
       "severity": "error"
     },
     {
-      "line": 165,
+      "line": 163,
       "column": 72,
-      "stop_line": 165,
+      "stop_line": 163,
       "stop_column": 83,
       "path": "tests/unit/tools/test_auto_configurator.py",
       "code": -2,
@@ -25344,294 +21864,6 @@
       "concise_description": "Argument `Literal['DETECTION']` is not assignable to parameter `task` with type `OTXTaskType | None` in function `otx.tools.auto_configurator.AutoConfigurator.__init__`",
       "severity": "error"
     },
-    {
-      "line": 111,
-      "column": 20,
-      "stop_line": 111,
-      "stop_column": 50,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_subset`\nObject of class `Path` has no attribute `train_subset`\nObject of class `str` has no attribute `train_subset`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_subset`\nObject of class `Path` has no attribute `train_subset`\nObject of class `str` has no attribute `train_subset`",
-      "severity": "error"
-    },
-    {
-      "line": 112,
-      "column": 16,
-      "stop_line": 112,
-      "stop_column": 50,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "severity": "error"
-    },
-    {
-      "line": 113,
-      "column": 20,
-      "stop_line": 113,
-      "stop_column": 54,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "severity": "error"
-    },
-    {
-      "line": 173,
-      "column": 20,
-      "stop_line": 173,
-      "stop_column": 50,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_subset`\nObject of class `Path` has no attribute `train_subset`\nObject of class `str` has no attribute `train_subset`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_subset`\nObject of class `Path` has no attribute `train_subset`\nObject of class `str` has no attribute `train_subset`",
-      "severity": "error"
-    },
-    {
-      "line": 174,
-      "column": 16,
-      "stop_line": 174,
-      "stop_column": 50,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "severity": "error"
-    },
-    {
-      "line": 176,
-      "column": 17,
-      "stop_line": 176,
-      "stop_column": 51,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "severity": "error"
-    },
-    {
-      "line": 192,
-      "column": 20,
-      "stop_line": 192,
-      "stop_column": 50,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_subset`\nObject of class `Path` has no attribute `train_subset`\nObject of class `str` has no attribute `train_subset`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_subset`\nObject of class `Path` has no attribute `train_subset`\nObject of class `str` has no attribute `train_subset`",
-      "severity": "error"
-    },
-    {
-      "line": 193,
-      "column": 16,
-      "stop_line": 193,
-      "stop_column": 50,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "severity": "error"
-    },
-    {
-      "line": 194,
-      "column": 20,
-      "stop_line": 194,
-      "stop_column": 54,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_dataloader`\nObject of class `Path` has no attribute `train_dataloader`\nObject of class `str` has no attribute `train_dataloader`",
-      "severity": "error"
-    },
-    {
-      "line": 196,
-      "column": 17,
-      "stop_line": 196,
-      "stop_column": 49,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `val_dataloader`\nObject of class `Path` has no attribute `val_dataloader`\nObject of class `str` has no attribute `val_dataloader`",
-      "concise_description": "Object of class `PathLike` has no attribute `val_dataloader`\nObject of class `Path` has no attribute `val_dataloader`\nObject of class `str` has no attribute `val_dataloader`",
-      "severity": "error"
-    },
-    {
-      "line": 210,
-      "column": 16,
-      "stop_line": 210,
-      "stop_column": 43,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `data_root`\nObject of class `Path` has no attribute `data_root`\nObject of class `str` has no attribute `data_root`",
-      "concise_description": "Object of class `PathLike` has no attribute `data_root`\nObject of class `Path` has no attribute `data_root`\nObject of class `str` has no attribute `data_root`",
-      "severity": "error"
-    },
-    {
-      "line": 211,
-      "column": 16,
-      "stop_line": 211,
-      "stop_column": 46,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_subset`\nObject of class `Path` has no attribute `train_subset`\nObject of class `str` has no attribute `train_subset`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_subset`\nObject of class `Path` has no attribute `train_subset`\nObject of class `str` has no attribute `train_subset`",
-      "severity": "error"
-    },
-    {
-      "line": 212,
-      "column": 16,
-      "stop_line": 212,
-      "stop_column": 44,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `val_subset`\nObject of class `Path` has no attribute `val_subset`\nObject of class `str` has no attribute `val_subset`",
-      "concise_description": "Object of class `PathLike` has no attribute `val_subset`\nObject of class `Path` has no attribute `val_subset`\nObject of class `str` has no attribute `val_subset`",
-      "severity": "error"
-    },
-    {
-      "line": 213,
-      "column": 16,
-      "stop_line": 213,
-      "stop_column": 45,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `test_subset`\nObject of class `Path` has no attribute `test_subset`\nObject of class `str` has no attribute `test_subset`",
-      "concise_description": "Object of class `PathLike` has no attribute `test_subset`\nObject of class `Path` has no attribute `test_subset`\nObject of class `str` has no attribute `test_subset`",
-      "severity": "error"
-    },
-    {
-      "line": 214,
-      "column": 16,
-      "stop_line": 214,
-      "stop_column": 46,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `train_subset`\nObject of class `Path` has no attribute `train_subset`\nObject of class `str` has no attribute `train_subset`",
-      "concise_description": "Object of class `PathLike` has no attribute `train_subset`\nObject of class `Path` has no attribute `train_subset`\nObject of class `str` has no attribute `train_subset`",
-      "severity": "error"
-    },
-    {
-      "line": 215,
-      "column": 16,
-      "stop_line": 215,
-      "stop_column": 44,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `val_subset`\nObject of class `Path` has no attribute `val_subset`\nObject of class `str` has no attribute `val_subset`",
-      "concise_description": "Object of class `PathLike` has no attribute `val_subset`\nObject of class `Path` has no attribute `val_subset`\nObject of class `str` has no attribute `val_subset`",
-      "severity": "error"
-    },
-    {
-      "line": 216,
-      "column": 16,
-      "stop_line": 216,
-      "stop_column": 45,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `test_subset`\nObject of class `Path` has no attribute `test_subset`\nObject of class `str` has no attribute `test_subset`",
-      "concise_description": "Object of class `PathLike` has no attribute `test_subset`\nObject of class `Path` has no attribute `test_subset`\nObject of class `str` has no attribute `test_subset`",
-      "severity": "error"
-    },
-    {
-      "line": 217,
-      "column": 20,
-      "stop_line": 217,
-      "stop_column": 49,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `tile_config`\nObject of class `Path` has no attribute `tile_config`\nObject of class `str` has no attribute `tile_config`",
-      "concise_description": "Object of class `PathLike` has no attribute `tile_config`\nObject of class `Path` has no attribute `tile_config`\nObject of class `str` has no attribute `tile_config`",
-      "severity": "error"
-    },
-    {
-      "line": 218,
-      "column": 16,
-      "stop_line": 218,
-      "stop_column": 45,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `tile_config`\nObject of class `Path` has no attribute `tile_config`\nObject of class `str` has no attribute `tile_config`",
-      "concise_description": "Object of class `PathLike` has no attribute `tile_config`\nObject of class `Path` has no attribute `tile_config`\nObject of class `str` has no attribute `tile_config`",
-      "severity": "error"
-    },
-    {
-      "line": 219,
-      "column": 16,
-      "stop_line": 219,
-      "stop_column": 44,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `PathLike` has no attribute `input_size`\nObject of class `Path` has no attribute `input_size`\nObject of class `str` has no attribute `input_size`",
-      "concise_description": "Object of class `PathLike` has no attribute `input_size`\nObject of class `Path` has no attribute `input_size`\nObject of class `str` has no attribute `input_size`",
-      "severity": "error"
-    },
-    {
-      "line": 220,
-      "column": 16,
-      "stop_line": 220,
-      "stop_column": 46,
-      "path": "tests/unit/tools/test_converter.py",
-      "code": -2,
-      "name": "missing-attribute",
-      "description": "Object of class `OVModel` has no attribute `data_input_params`\nObject of class `PathLike` has no attribute `data_input_params`\nObject of class `Path` has no attribute `data_input_params`\nObject of class `str` has no attribute `data_input_params`",
-      "concise_description": "Object of class `OVModel` has no attribute `data_input_params`\nObject of class `PathLike` has no attribute `data_input_params`\nObject of class `Path` has no attribute `data_input_params`\nObject of class `str` has no attribute `data_input_params`",
-      "severity": "error"
-    },
-    {
-      "line": 48,
-      "column": 24,
-      "stop_line": 48,
-      "stop_column": 43,
-      "path": "tests/unit/types/test_label.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal[datumaro.components.annotation.GroupType.EXCLUSIVE]` is not assignable to parameter `group_type` with type `datumaro.experimental.categories.GroupType` in function `datumaro.experimental.categories.LabelGroup.__init__`",
-      "concise_description": "Argument `Literal[datumaro.components.annotation.GroupType.EXCLUSIVE]` is not assignable to parameter `group_type` with type `datumaro.experimental.categories.GroupType` in function `datumaro.experimental.categories.LabelGroup.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 53,
-      "column": 24,
-      "stop_line": 53,
-      "stop_column": 43,
-      "path": "tests/unit/types/test_label.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal[datumaro.components.annotation.GroupType.EXCLUSIVE]` is not assignable to parameter `group_type` with type `datumaro.experimental.categories.GroupType` in function `datumaro.experimental.categories.LabelGroup.__init__`",
-      "concise_description": "Argument `Literal[datumaro.components.annotation.GroupType.EXCLUSIVE]` is not assignable to parameter `group_type` with type `datumaro.experimental.categories.GroupType` in function `datumaro.experimental.categories.LabelGroup.__init__`",
-      "severity": "error"
-    },
-    {
-      "line": 55,
-      "column": 70,
-      "stop_line": 55,
-      "stop_column": 90,
-      "path": "tests/unit/types/test_label.py",
-      "code": -2,
-      "name": "bad-argument-type",
-      "description": "Argument `Literal[datumaro.components.annotation.GroupType.RESTRICTED]` is not assignable to parameter `group_type` with type `datumaro.experimental.categories.GroupType` in function `datumaro.experimental.categories.LabelGroup.__init__`",
-      "concise_description": "Argument `Literal[datumaro.components.annotation.GroupType.RESTRICTED]` is not assignable to parameter `group_type` with type `datumaro.experimental.categories.GroupType` in function `datumaro.experimental.categories.LabelGroup.__init__`",
-      "severity": "error"
-    },
     {
       "line": 36,
       "column": 9,
@@ -25717,4 +21949,4 @@
       "severity": "error"
     }
   ]
-}
+}
\ No newline at end of file
diff --git a/library/src/otx/backend/native/callbacks/__init__.py b/library/src/otx/backend/native/callbacks/__init__.py
index ba59945f67b..81cf38b2c67 100644
--- a/library/src/otx/backend/native/callbacks/__init__.py
+++ b/library/src/otx/backend/native/callbacks/__init__.py
@@ -4,6 +4,7 @@
 """Module for OTX custom callbacks."""
 
 from .batchsize_finder import BatchSizeFinder
+from .gpu_augmentation import GPUAugmentationCallback
 from .lr_monitor import SimpleLearningRateMonitor
 
-__all__ = ["BatchSizeFinder", "SimpleLearningRateMonitor"]
+__all__ = ["BatchSizeFinder", "GPUAugmentationCallback", "SimpleLearningRateMonitor"]
diff --git a/library/src/otx/backend/native/callbacks/aug_scheduler.py b/library/src/otx/backend/native/callbacks/aug_scheduler.py
index 38089e8eb98..e6c7a2bb183 100644
--- a/library/src/otx/backend/native/callbacks/aug_scheduler.py
+++ b/library/src/otx/backend/native/callbacks/aug_scheduler.py
@@ -1,108 +1,111 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-"""Data augmentation scheduler for training."""
+"""Data augmentation scheduler for training.
+
+Supports the CPU/GPU augmentation pipeline architecture:
+- CPU augmentations (torchvision): run in Dataset workers before collate
+- GPU augmentations (Kornia): run after batch transfer via GPUAugmentationCallback
+
+Each policy defines ``augmentations_cpu`` and optionally ``augmentations_gpu``.
+The scheduler swaps the CPU pipeline on the dataset at each ``__getitem__`` call
+and swaps the GPU pipeline on the GPUAugmentationCallback at each epoch boundary.
+"""
 
 from __future__ import annotations
 
+import logging
 import secrets
 from typing import TYPE_CHECKING, Any
 
 from lightning.pytorch.callbacks.callback import Callback
 
 from otx.config.data import SubsetConfig
-from otx.data.transform_libs.torchvision import Compose, TorchVisionTransformLib
+from otx.data.augmentation import CPUAugmentationPipeline, GPUAugmentationPipeline
 
 if TYPE_CHECKING:
     from multiprocessing import Value
 
     from lightning.pytorch import LightningModule, Trainer
 
+    from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
 
-class DataAugSwitch:
-    """Data augmentation switch for dynamic scheduling of augmentation policies during training.
+logger = logging.getLogger(__name__)
 
-    This class manages multiple data augmentation policies and switches between them
-    based on the current training epoch. It is designed to support multi-stage augmentation
-    strategies, such as starting with no augmentation, then applying strong augmentations,
-    and finally switching to lighter augmentations as training progresses.
 
-    The switch is typically used in conjunction with a callback (e.g., AugmentationSchedulerCallback)
-    that updates the current epoch, allowing the augmentation policy to change automatically
-    as training advances.
+class DataAugSwitch:
+    """Dynamic augmentation policy switch for the CPU/GPU pipeline.
 
-    Args:
-        policy_epochs (list[int]): List of 3 epoch indices that define the boundaries between
-            augmentation stages. For example, [4, 29, 50] means:
-                - Epochs < 4: use "no_aug"
-                - 4 <= epochs < 29: use "strong_aug_1" or "strong_aug_2" (randomly chosen)
-                - epochs >= 29: use "light_aug"
-        policies (dict[str, dict[str, Any]]): Dictionary mapping policy names to their configuration.
-            Each configuration should include a "transforms" key (list of transform configs),
-            and optionally "to_tv_image" (bool).
-
-    Attributes:
-        policy_epochs (list[int]): The epoch boundaries for switching policies.
-        policies (dict[str, dict[str, Any]]): The processed policy configurations.
-        _shared_epoch: A multiprocessing.Value or similar object for sharing the current epoch.
-            This attribute holds a reference to a multiprocessing.Value used to synchronize
-            and share the current training epoch across multiple processes.
-            This is necessary in distributed or multi-process training scenarios, where each process may
-            need to access or update the current epoch in a thread-safe and consistent manner.
-            By using a shared object, the augmentation policy can be switched reliably based on the global
-            training progress, ensuring all processes use the correct augmentation strategy.
-
-    Example:
-        >>> policy_epochs = [4, 29, 50]
-        >>> policies = {
-        ...     "no_aug": {"transforms": [...]},
-        ...     "strong_aug_1": {"transforms": [...]},
-        ...     "strong_aug_2": {"transforms": [...]},
-        ...     "light_aug": {"transforms": [...]},
-        ... }
-        >>> switch = DataAugSwitch(policy_epochs, policies)
-        >>> switch.set_shared_epoch(shared_epoch)
-        >>> # During training, update epoch:
-        >>> switch.epoch = 10
-        >>> to_tv_image, transforms = switch.current_transforms
-
-    Note:
-        - The current policy is determined by the current epoch and the provided policy_epochs.
-        - For the "strong augmentation" stage, one of the strong policies is randomly selected
-          for each call using a cryptographically secure random choice.
-        - The transforms for each policy are generated using TorchVisionTransformLib.
+    Manages multiple augmentation policies and switches between them based on the
+    current training epoch. Each policy contains separate CPU and GPU augmentation
+    lists that are built into ``CPUAugmentationPipeline`` and ``GPUAugmentationPipeline``
+    respectively.
 
+    Args:
+        policy_epochs (list[int]): List of 3 epoch boundaries ``[p0, p1]``:
+            - ``epoch < p0``: ``no_aug``
+            - ``p0 <= epoch < p1``: ``strong_aug_1`` or ``strong_aug_2`` (random)
+            - ``epoch >= p1``: ``light_aug``
+        policies (dict[str, dict[str, Any]]): Policy name → config mapping.
+            Each config must have ``augmentations_cpu`` (list of transform dicts).
+            Optionally ``augmentations_gpu`` (list of transform dicts).
+        input_size (tuple[int,int] | list[int] | None): Model input size for
+            ``$(input_size)`` placeholder resolution.
+
+    Example::
+
+        policies = {
+            "no_aug": {
+                "augmentations_cpu": [
+                    {"class_path": "otx.data.augmentation.transforms.Resize", ...},
+                ],
+                "augmentations_gpu": [
+                    {"class_path": "kornia.augmentation.Normalize", ...},
+                ],
+            },
+            "strong_aug_1": { ... },
+            "strong_aug_2": { ... },
+            "light_aug": { ... },
+        }
+        switch = DataAugSwitch([4, 29], policies, input_size=[640, 640])
     """
 
     def __init__(
         self,
         policy_epochs: list[int],
         policies: dict[str, dict[str, Any]],
+        input_size: tuple[int, int] | list[int] | None = None,
     ) -> None:
-        """Initialize the data augmentation switch."""
-        if len(policy_epochs) != 3:
-            msg = "Expected 3 policy epochs for 4-stage scheduler (e.g., [4, 29, 50])"
+        if len(policy_epochs) != 2:
+            msg = "Expected 2 policy epochs for 3-stage scheduler (e.g., [4, 29])"
             raise ValueError(msg)
 
         self.policy_epochs = policy_epochs
-        self.policies = policies
         self._shared_epoch = None
+        self.input_size = tuple(input_size) if input_size is not None else None
+        self._gpu_pipeline_cache: dict[tuple[str, tuple[str, ...] | None], GPUAugmentationPipeline] = {}
 
-        # Compose transforms for each policy
+        # Build pipelines for each policy
+        self.policies: dict[str, dict[str, Any]] = {}
         for name, config in policies.items():
+            cpu_aug_configs = config.get("augmentations_cpu", [])
+            gpu_aug_configs = config.get("augmentations_gpu", [])
+
+            # Build CPU pipeline via SubsetConfig → CPUAugmentationPipeline
+            cpu_subset = SubsetConfig(
+                augmentations_cpu=cpu_aug_configs,
+                subset_name=name,
+                input_size=self.input_size,  # type: ignore[arg-type]
+            )
+            cpu_pipeline = CPUAugmentationPipeline.from_config(cpu_subset)
+
             self.policies[name] = {
-                "to_tv_image": config.get("to_tv_image", True),
-                "transforms": TorchVisionTransformLib.generate(
-                    config=SubsetConfig(
-                        transforms=config["transforms"],
-                        batch_size=1,
-                        subset_name=name,
-                    ),
-                ),
+                "cpu_pipeline": cpu_pipeline,
+                "gpu_aug_configs": gpu_aug_configs,
             }
 
     def set_shared_epoch(self, shared_epoch: Value) -> None:  # type: ignore[valid-type]
-        """Set the shared epoch."""
+        """Set the shared multiprocessing epoch value."""
         self._shared_epoch = shared_epoch
 
     @property
@@ -123,73 +126,142 @@ def epoch(self, value: int) -> None:
 
     @property
     def current_policy_name(self) -> str:
-        """Get the current policy name."""
+        """Get the current policy name based on epoch.
+
+        During the strong augmentation phase (p0 <= epoch < p1), randomly
+        selects between ``strong_aug_1`` and ``strong_aug_2`` so that each
+        dataset worker can get a different variant per sample.
+        """
         e = self.epoch
-        p0, p1, _ = self.policy_epochs
+        p0, p1 = self.policy_epochs
         if e < p0:
             return "no_aug"
         if p0 <= e < p1:
-            # Use secrets.choice for cryptographically secure random selection
             return secrets.choice(["strong_aug_1", "strong_aug_2"])
         return "light_aug"
 
     @property
-    def current_transforms(self) -> tuple[bool, Compose]:
-        """Get the current transforms."""
-        name = self.current_policy_name
-        policy = self.policies.get(name)
-        return policy["to_tv_image"], policy["transforms"]  # type: ignore[index]
+    def current_gpu_policy_name(self) -> str:
+        """Get deterministic policy name used for GPU pipeline selection."""
+        e = self.epoch
+        p0, p1 = self.policy_epochs
+        if e < p0:
+            return "no_aug"
+        if p0 <= e < p1:
+            if "strong_aug_1" in self.policies:
+                return "strong_aug_1"
+            if "strong_aug_2" in self.policies:
+                return "strong_aug_2"
+            return "no_aug"
+        return "light_aug"
+
+    def get_cpu_pipeline(self, policy_name: str) -> CPUAugmentationPipeline:
+        """Get the CPU augmentation pipeline for a specific policy."""
+        return self.policies[policy_name]["cpu_pipeline"]
+
+    def get_gpu_aug_configs(self, policy_name: str) -> list[dict[str, Any]]:
+        """Get raw GPU augmentation configs for a given policy.
+
+        Returns empty list if no GPU augmentations are defined for that policy.
+        """
+        return self.policies[policy_name].get("gpu_aug_configs", [])
+
+    def build_gpu_pipeline(self, policy_name: str, data_keys: list[str] | None = None) -> GPUAugmentationPipeline:
+        """Build a GPUAugmentationPipeline for the given policy.
+
+        Args:
+            policy_name: Name of the policy.
+            data_keys: Kornia data_keys for AugmentationSequential.
+
+        Returns:
+            GPUAugmentationPipeline instance.
+        """
+        cache_key = (policy_name, tuple(data_keys) if data_keys else None)
+        if cache_key in self._gpu_pipeline_cache:
+            return self._gpu_pipeline_cache[cache_key]
+
+        gpu_configs = self.get_gpu_aug_configs(policy_name)
+        if not gpu_configs:
+            pipeline = GPUAugmentationPipeline([], data_keys=data_keys)
+            self._gpu_pipeline_cache[cache_key] = pipeline
+            return pipeline
+
+        gpu_subset = SubsetConfig(
+            augmentations_gpu=gpu_configs,
+            batch_size=1,
+            subset_name=policy_name,
+            input_size=self.input_size,  # type: ignore[arg-type]
+        )
+        pipeline = GPUAugmentationPipeline.from_config(gpu_subset, data_keys=data_keys)
+        self._gpu_pipeline_cache[cache_key] = pipeline
+        return pipeline
 
 
 class AugmentationSchedulerCallback(Callback):
-    """Callback for managing data augmentation scheduling during training.
+    """Callback that drives augmentation policy switching at epoch boundaries.
 
-    This callback is designed to work with a `DataAugSwitch` object, which controls
-    the augmentation policy applied to the training data at each epoch. The callback
-    updates the current epoch in the `DataAugSwitch` at the start of each training epoch,
-    allowing the augmentation policy to change dynamically as training progresses.
+    At each epoch start:
+    1. Updates the shared epoch counter so CPU workers pick the correct policy.
+    2. If GPU augmentations differ per policy, rebuilds the GPU pipeline on the
+       ``GPUAugmentationCallback`` for the active policy.
 
-    Typical usage involves attaching this callback to a PyTorch Lightning Trainer,
-    and providing it with a `DataAugSwitch` instance that manages the augmentation logic.
+    The CPU pipeline swap happens lazily in each dataset worker via
+    ``DataAugSwitchMixin._apply_augmentation_switch()`` which calls
+    ``DataAugSwitch.get_cpu_pipeline()``.
 
     Args:
-        data_aug_switch (DataAugSwitch | None): Optional. The DataAugSwitch instance
-            that controls augmentation policies. Can be set later via `set_data_aug_switch()`.
-
-    Example:
-        >>> data_aug_switch = DataAugSwitch(...)
-        >>> aug_callback = AugmentationSchedulerCallback(data_aug_switch)
-        >>> trainer = Trainer(callbacks=[aug_callback])
-        >>> trainer.fit(model, datamodule=...)
-
-        # Alternatively, set the DataAugSwitch after instantiation:
-        >>> aug_callback = AugmentationSchedulerCallback()
-        >>> aug_callback.set_data_aug_switch(data_aug_switch)
-        >>> trainer = Trainer(callbacks=[aug_callback])
-
-    Note:
-        - The `DataAugSwitch` instance must be set before training starts.
-        - This callback assumes that the `DataAugSwitch` object has an `epoch` property
-          that can be updated to reflect the current training epoch.
+        data_aug_switch: The DataAugSwitch that manages policies.
     """
 
     def __init__(self, data_aug_switch: DataAugSwitch | None = None):
         super().__init__()
         self.data_aug_switch = data_aug_switch
+        self._gpu_aug_callback: GPUAugmentationCallback | None = None
+        self._last_gpu_policy: str | None = None
 
-    def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
-        """Update the DataAugSwitch with the current epoch at the start of each training epoch.
+    def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> None:
+        """Find and cache reference to GPUAugmentationCallback for GPU pipeline swaps."""
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
 
-        Args:
-            trainer (Trainer): The PyTorch Lightning Trainer instance.
-            pl_module (LightningModule): The LightningModule being trained.
-        """
-        self.data_aug_switch.epoch = trainer.current_epoch  # type: ignore[union-attr]
+        for callback in trainer.callbacks:  # type: ignore[union-attr]
+            if isinstance(callback, GPUAugmentationCallback):
+                self._gpu_aug_callback = callback
+                break
 
-    def set_data_aug_switch(self, data_aug_switch: DataAugSwitch) -> None:
-        """Set or update the DataAugSwitch instance for this callback.
+    def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
+        """Update epoch and swap GPU pipeline if the phase changed."""
+        if self.data_aug_switch is None:
+            return
+
+        self.data_aug_switch.epoch = trainer.current_epoch
+
+        # Swap GPU pipeline if deterministic GPU phase changed.
+        gpu_policy = self.data_aug_switch.current_gpu_policy_name
+        if gpu_policy != self._last_gpu_policy:
+            self._swap_gpu_pipeline(gpu_policy, pl_module)
+            self._last_gpu_policy = gpu_policy
+
+    def _swap_gpu_pipeline(self, policy_name: str, pl_module: LightningModule) -> None:
+        """Rebuild and assign the GPU pipeline for the new policy."""
+        if self._gpu_aug_callback is None or self.data_aug_switch is None:
+            return
+
+        # Get data_keys from the existing pipeline before overwriting
+        data_keys = None
+        if self._gpu_aug_callback._train_pipeline is not None:  # noqa: SLF001
+            data_keys = self._gpu_aug_callback._train_pipeline.data_keys  # noqa: SLF001
+
+        new_pipeline = self.data_aug_switch.build_gpu_pipeline(policy_name, data_keys=data_keys)
+        # Move to same device as model
+        new_pipeline = new_pipeline.to(pl_module.device)
+        # Assign the new pipeline to the GPUAugmentationCallback
+        self._gpu_aug_callback._train_pipeline = new_pipeline  # noqa: SLF001
+        # Log the change. If the new pipeline has no augmentations, it will have aug_sequential = None.
+        if new_pipeline.aug_sequential is not None:
+            logger.info(f"Swapped GPU augmentation pipeline to policy '{policy_name}'")
+        else:
+            logger.info(f"No GPU augmentations for policy '{policy_name}'; cleared GPU pipeline")
 
-        Args:
-            data_aug_switch (DataAugSwitch): The DataAugSwitch to use.
-        """
+    def set_data_aug_switch(self, data_aug_switch: DataAugSwitch) -> None:
+        """Set or update the DataAugSwitch instance."""
         self.data_aug_switch = data_aug_switch
diff --git a/library/src/otx/backend/native/callbacks/gpu_augmentation.py b/library/src/otx/backend/native/callbacks/gpu_augmentation.py
new file mode 100644
index 00000000000..a14036267a8
--- /dev/null
+++ b/library/src/otx/backend/native/callbacks/gpu_augmentation.py
@@ -0,0 +1,250 @@
+# Copyright (C) 2024-2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""GPU Augmentation Callback for applying Kornia augmentations on GPU."""
+
+from __future__ import annotations
+
+import logging as log
+import typing
+from typing import TYPE_CHECKING, ClassVar
+
+import torch
+from lightning import Callback
+from torchvision import tv_tensors
+
+from otx.data.augmentation import GPUAugmentationPipeline
+from otx.data.entity.sample import OTXSampleBatch
+from otx.types.task import OTXTaskType
+
+if TYPE_CHECKING:
+    from lightning import LightningModule, Trainer
+
+    from otx.config.data import SubsetConfig
+
+
+class GPUAugmentationCallback(Callback):
+    """Callback to apply GPU augmentations using Kornia.
+
+    This callback applies GPU-accelerated augmentations from the GPUAugmentationPipeline
+    to batches during training and optionally during validation/testing.
+
+    Key features:
+    - Applies augmentations after batch is transferred to GPU
+    - Extracts normalization parameters and updates model's data_input_params
+    - Supports separate train/val pipelines with different augmentation configs
+    - Automatically handles bboxes, masks, keypoints based on batch content
+
+    Args:
+        train_config: SubsetConfig for training augmentations.
+        val_config: SubsetConfig for validation augmentations (optional).
+        test_config: SubsetConfig for test augmentations (optional, defaults to val_config).
+
+    Example:
+        >>> callback = GPUAugmentationCallback(
+        ...     train_config=train_subset_config,
+        ...     val_config=val_subset_config,
+        ... )
+        >>> trainer = Trainer(callbacks=[callback])
+    """
+
+    # Data keys for each task type. Masks for instance segmentation are handled
+    # with special preprocessing (add channel dim) in GPUAugmentationPipeline.forward().
+    _DATA_KEYS_BY_TASK: ClassVar[dict[OTXTaskType, tuple[str, ...]]] = {
+        OTXTaskType.MULTI_CLASS_CLS: ("label",),
+        OTXTaskType.MULTI_LABEL_CLS: ("label",),
+        OTXTaskType.H_LABEL_CLS: ("label",),
+        OTXTaskType.DETECTION: ("bbox_xyxy", "label"),
+        OTXTaskType.INSTANCE_SEGMENTATION: ("bbox_xyxy", "mask", "label"),
+        OTXTaskType.KEYPOINT_DETECTION: ("keypoints", "label"),
+        OTXTaskType.SEMANTIC_SEGMENTATION: ("mask",),
+    }
+
+    def __init__(
+        self,
+        train_config: SubsetConfig | None = None,
+        val_config: SubsetConfig | None = None,
+        test_config: SubsetConfig | None = None,
+    ) -> None:
+        super().__init__()
+        self.train_config = train_config
+        self.val_config = val_config
+        self.test_config = test_config if test_config is not None else val_config
+
+        self._train_pipeline: GPUAugmentationPipeline | None = None
+        self._val_pipeline: GPUAugmentationPipeline | None = None
+        self._test_pipeline: GPUAugmentationPipeline | None = None
+
+    def setup(self, trainer: Trainer, pl_module: LightningModule, stage: str) -> None:
+        """Setup the GPU augmentation pipelines.
+
+        This is called once when the trainer is setup.
+        """
+        data_keys = ["input", *self._DATA_KEYS_BY_TASK.get(pl_module.task, [])]  # type: ignore[arg-type]
+        if self.train_config is not None:
+            self._train_pipeline = GPUAugmentationPipeline.from_config(self.train_config, data_keys=data_keys)
+            log.info(f"GPU train augmentation pipeline:\n{self._train_pipeline}")
+
+        if self.val_config is not None:
+            self._val_pipeline = GPUAugmentationPipeline.from_config(self.val_config, data_keys=data_keys)
+            log.info(f"GPU val augmentation pipeline:\n{self._val_pipeline}")
+
+        if self.test_config is not None:
+            self._test_pipeline = GPUAugmentationPipeline.from_config(self.test_config, data_keys=data_keys)
+            log.info(f"GPU test augmentation pipeline:\n{self._test_pipeline}")
+
+        # Update model's normalization params from GPU pipeline
+        self._update_model_normalization(pl_module)
+
+    def _update_model_normalization(self, pl_module: LightningModule) -> None:
+        """Update model's data_input_params with normalization from GPU pipeline.
+
+        If normalization is in the GPU pipeline, we need to update the model's
+        mean/std so that export and inference use the correct values.
+        If both model and pipeline have None, set defaults (0,0,0) and (1,1,1).
+        """
+        # Since we use mean, std values for model export
+        # We derive mean, std from test pipeline as priority
+        pipeline = self._test_pipeline if self._test_pipeline is not None else self._train_pipeline
+
+        pipeline_mean = pipeline.mean if pipeline is not None else None
+        pipeline_std = pipeline.std if pipeline is not None else None
+        model_mean = getattr(pl_module.data_input_params, "mean", None)  # type: ignore[union-attr]
+        model_std = getattr(pl_module.data_input_params, "std", None)  # type: ignore[union-attr]
+
+        # pipeline > model > default
+        data_input_params = pl_module.data_input_params  # type: ignore[union-attr]
+        data_input_params.mean = pipeline_mean or model_mean or (0, 0, 0)  # type: ignore[union-attr]
+        data_input_params.std = pipeline_std or model_std or (1, 1, 1)  # type: ignore[union-attr]
+
+        # log update
+        if model_mean != data_input_params.mean or model_std != data_input_params.std:
+            log.info(f"Updated model mean: {model_mean} -> {data_input_params.mean}")
+            log.info(f"Updated model std: {model_std} -> {data_input_params.std}")
+
+    def _apply_pipeline(
+        self,
+        pipeline: GPUAugmentationPipeline,
+        batch: OTXSampleBatch,
+    ) -> None:
+        """Apply GPU augmentation pipeline to batch (in-place).
+
+        Automatically determines which data to transform based on batch content.
+        Kornia decides whether to modify labels based on the augmentations used.
+
+        Args:
+            pipeline: GPUAugmentationPipeline to apply.
+            batch: OTXSampleBatch to transform.
+        """
+        # Move pipeline to same device as batch
+        device = batch.images.device if hasattr(batch.images, "device") else None
+        if device is not None:
+            pipeline = pipeline.to(device)
+
+        keypoints_xy: list[torch.Tensor] | None = None
+        keypoints_visibility: list[torch.Tensor | None] | None = None
+        if batch.keypoints is not None:
+            keypoints_xy = []
+            keypoints_visibility = []
+            _images = typing.cast("torch.Tensor", batch.images)
+            for kps in batch.keypoints:
+                if kps is None:
+                    keypoints_xy.append(torch.empty((0, 2), device=_images.device, dtype=_images.dtype))
+                    keypoints_visibility.append(None)
+                    continue
+                keypoints_xy.append(kps[:, :2])
+                if kps.shape[-1] >= 3:
+                    keypoints_visibility.append(kps[:, 2])
+                else:
+                    keypoints_visibility.append(torch.ones(kps.shape[0], device=kps.device, dtype=kps.dtype))
+
+        # Apply pipeline - returns dict with augmented data
+        # Labels are included in data_keys, so Kornia will process them if applicable
+        result = pipeline(
+            batch.images,
+            labels=batch.labels,
+            bboxes=batch.bboxes,
+            masks=batch.masks,
+            keypoints=keypoints_xy,
+        )
+
+        # Update batch in-place with augmented data
+        batch.images = result["images"]
+        if result.get("labels") is not None:
+            batch.labels = result["labels"]
+        if result.get("bboxes") is not None and batch.bboxes is not None:
+            # Kornia may return plain tensors, wrap them back to BoundingBoxes
+            # Use original canvas_size from batch.bboxes since Kornia does not modify the shape.
+            batch.bboxes = [
+                tv_tensors.BoundingBoxes(  # type: ignore[no-matching-overload]
+                    b,
+                    format=tv_tensors.BoundingBoxFormat.XYXY,
+                    canvas_size=batch.bboxes[i].canvas_size,
+                )
+                if not isinstance(b, tv_tensors.BoundingBoxes)
+                else b
+                for i, b in enumerate(result["bboxes"])
+            ]
+        if result.get("masks") is not None:
+            # Kornia may return plain tensors, wrap them back to Mask
+            batch.masks = [tv_tensors.Mask(m) if not isinstance(m, tv_tensors.Mask) else m for m in result["masks"]]
+        if result.get("keypoints") is not None:
+            if keypoints_visibility is None:
+                batch.keypoints = result["keypoints"]
+            else:
+                # update keypoints visibility based on whether they are in bounds after augmentation
+                height, width = batch.images.shape[-2], batch.images.shape[-1]
+                restored_keypoints: list[torch.Tensor | None] = []
+                for aug_xy, vis in zip(result["keypoints"], keypoints_visibility):
+                    if vis is None:
+                        restored_keypoints.append(None)
+                        continue
+
+                    in_bounds = (
+                        (aug_xy[:, 0] >= 0) & (aug_xy[:, 0] < width) & (aug_xy[:, 1] >= 0) & (aug_xy[:, 1] < height)
+                    )
+                    updated_vis = vis.to(dtype=aug_xy.dtype) * in_bounds.to(dtype=aug_xy.dtype)
+                    restored_keypoints.append(torch.cat([aug_xy, updated_vis.unsqueeze(-1)], dim=-1))
+
+                batch.keypoints = typing.cast("list[torch.Tensor] | None", restored_keypoints)
+
+    def on_train_batch_start(
+        self,
+        trainer: Trainer,
+        pl_module: LightningModule,
+        batch: OTXSampleBatch,
+        batch_idx: int,
+    ) -> None:
+        """Apply GPU augmentations to training batch."""
+        if self._train_pipeline is None:
+            return
+
+        self._apply_pipeline(self._train_pipeline, batch)
+
+    def on_validation_batch_start(
+        self,
+        trainer: Trainer,
+        pl_module: LightningModule,
+        batch: OTXSampleBatch,
+        batch_idx: int,
+        dataloader_idx: int = 0,
+    ) -> None:
+        """Apply GPU augmentations to validation batch."""
+        if self._val_pipeline is None:
+            return
+
+        self._apply_pipeline(self._val_pipeline, batch)
+
+    def on_test_batch_start(
+        self,
+        trainer: Trainer,
+        pl_module: LightningModule,
+        batch: OTXSampleBatch,
+        batch_idx: int,
+        dataloader_idx: int = 0,
+    ) -> None:
+        """Apply GPU augmentations to test batch."""
+        if self._test_pipeline is None:
+            return
+
+        self._apply_pipeline(self._test_pipeline, batch)
diff --git a/library/src/otx/backend/native/engine.py b/library/src/otx/backend/native/engine.py
index 95e92903f27..90ba7cb1147 100644
--- a/library/src/otx/backend/native/engine.py
+++ b/library/src/otx/backend/native/engine.py
@@ -9,10 +9,10 @@
 import csv
 import inspect
 import logging
+import multiprocessing
 import os
 import time
 from contextlib import contextmanager
-from multiprocessing import Value
 from pathlib import Path
 from pickle import UnpicklingError  # nosec B403: UnpicklingError is used only for exception handling
 from typing import TYPE_CHECKING, Any, Callable, ClassVar, Iterable, Iterator, Literal
@@ -26,10 +26,11 @@
 
 from otx.backend.native.callbacks.adaptive_train_scheduling import AdaptiveTrainScheduling
 from otx.backend.native.callbacks.aug_scheduler import AugmentationSchedulerCallback
+from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
 from otx.backend.native.callbacks.gpu_mem_monitor import GPUMemMonitor
 from otx.backend.native.callbacks.iteration_timer import IterationTimer
 from otx.backend.native.callbacks.lr_monitor import SimpleLearningRateMonitor
-from otx.backend.native.models.base import DataInputParams, OTXModel
+from otx.backend.native.models.base import OTXModel
 from otx.backend.native.tools import adapt_batch_size
 from otx.backend.native.utils.cache import TrainerArgumentsCache
 from otx.config.device import DeviceConfig
@@ -139,11 +140,15 @@ def __init__(
                     "Input size is not specified in the datamodule. Ensure that the datamodule has a valid input size."
                 )
                 raise ValueError(msg)
-            get_model_args["data_input_params"] = DataInputParams(
-                input_size=input_size,
-                mean=self._datamodule.input_mean,
-                std=self._datamodule.input_std,
-            )
+            # Only pass what the datamodule knows; mean/std may be None when
+            # normalization lives in the augmentation pipeline. Model defaults
+            # fill in any missing values inside _configure_preprocessing_params.
+            params: dict[str, Any] = {"input_size": input_size}
+            if self._datamodule.input_mean is not None:
+                params["mean"] = self._datamodule.input_mean
+            if self._datamodule.input_std is not None:
+                params["std"] = self._datamodule.input_std
+            get_model_args["data_input_params"] = params
 
             model = self._auto_configurator.get_model(**get_model_args)
 
@@ -986,11 +991,44 @@ def configure_callbacks(self) -> None:
         if not has_callback(GPUMemMonitor):
             callbacks.append(GPUMemMonitor())
 
+        # Add GPU augmentation callback if GPU augmentations are configured
+        if not has_callback(GPUAugmentationCallback):
+            gpu_aug_callback = self._build_gpu_augmentation_callback()
+            if gpu_aug_callback is not None:
+                callbacks.append(gpu_aug_callback)
+
         self._cache.args["callbacks"] = callbacks + config_callbacks
 
         # Setup DataAugSwitch with shared multiprocessing.Value
         self._setup_augmentation_scheduler()
 
+    def _build_gpu_augmentation_callback(self) -> GPUAugmentationCallback | None:
+        """Build GPU augmentation callback from datamodule configs.
+
+        Returns:
+            GPUAugmentationCallback if GPU augmentations are configured, None otherwise.
+        """
+        train_config = self._datamodule.train_subset
+        val_config = self._datamodule.val_subset
+        test_config = self._datamodule.test_subset
+
+        # Check if any GPU augmentations are configured
+        has_train_gpu_augs = (
+            train_config and hasattr(train_config, "augmentations_gpu") and train_config.augmentations_gpu
+        )
+        has_val_gpu_augs = val_config and hasattr(val_config, "augmentations_gpu") and val_config.augmentations_gpu
+        has_test_gpu_augs = test_config and hasattr(test_config, "augmentations_gpu") and test_config.augmentations_gpu
+
+        if not has_train_gpu_augs and not has_val_gpu_augs:
+            return None
+
+        logging.info("Building GPU augmentation callback with Kornia augmentations")
+        return GPUAugmentationCallback(
+            train_config=train_config if has_train_gpu_augs else None,
+            val_config=val_config if has_val_gpu_augs else None,
+            test_config=test_config if has_test_gpu_augs else None,
+        )
+
     def _setup_augmentation_scheduler(self) -> None:
         """Set up shared memory for DataAugSwitch and AugmentationSchedulerCallback.
 
@@ -1040,8 +1078,10 @@ def _setup_augmentation_scheduler(self) -> None:
 
         # If AugmentationSchedulerCallback exists and has a data_aug_switch, set up shared memory
         if aug_scheduler_callback is not None and aug_scheduler_callback.data_aug_switch is not None:
-            # Create shared multiprocessing.Value for epoch tracking
-            shared_epoch = Value("i", 0)
+            # Create shared multiprocessing.Value for epoch tracking.
+            # Must use "spawn" context to match the DataLoader's multiprocessing_context,
+            # otherwise the SemLock created in a fork context cannot be shared with spawn workers.
+            shared_epoch = multiprocessing.get_context("spawn").Value("i", 0)
             aug_scheduler_callback.data_aug_switch.set_shared_epoch(shared_epoch)
 
     def _setup_data_aug_switch_for_datasets(self) -> None:
diff --git a/library/src/otx/backend/native/exporter/base.py b/library/src/otx/backend/native/exporter/base.py
index 7a36019ebf9..656658eba4f 100644
--- a/library/src/otx/backend/native/exporter/base.py
+++ b/library/src/otx/backend/native/exporter/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Class definition for base model exporter used in OTX."""
@@ -171,8 +171,8 @@ def _extend_model_metadata(self, metadata: dict[tuple[str, str], str]) -> dict[t
         Returns:
             dict[tuple[str, str] ,str]: updated metadata
         """
-        mean_str = " ".join(map(str, self.data_input_params.mean))
-        std_str = " ".join(map(str, self.data_input_params.std))
+        mean_str = " ".join(map(str, self.data_input_params.mean)) if self.data_input_params.mean else ""
+        std_str = " ".join(map(str, self.data_input_params.std)) if self.data_input_params.std else ""
 
         extra_data = {
             ("model_info", "mean_values"): mean_str.strip(),
diff --git a/library/src/otx/backend/native/models/base.py b/library/src/otx/backend/native/models/base.py
index 89472585366..fc445154cbd 100644
--- a/library/src/otx/backend/native/models/base.py
+++ b/library/src/otx/backend/native/models/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2025 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Class definition for base model entity used in OTX."""
@@ -75,7 +75,11 @@ def as_dict(self) -> dict[str, Any]:
 
     def as_ncwh(self, batch_size: int = 1) -> tuple[int, int, int, int]:
         """Convert input_size to NCWH format."""
-        return (batch_size, 3, *self.input_size)
+        if self.input_size is not None:
+            return (batch_size, 3, *self.input_size)
+
+        msg = "input_size should not be None."
+        raise ValueError(msg)
 
 
 def _default_optimizer_callable(params: params_t) -> Optimizer:
@@ -133,7 +137,6 @@ def __init__(
         self,
         label_info: LabelInfoTypes | int | Sequence,
         data_input_params: DataInputParams | dict | None = None,
-        task: OTXTaskType | None = None,
         model_name: str = "OTXModel",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -159,29 +162,16 @@ def __init__(
             torch_compile (bool, optional): Flag to indicate if torch.compile should be used. Defaults to False.
             tile_config (TileConfig, optional): Configuration for tiling. Defaults to TileConfig(enable_tiler=False).
 
-        Returns:
-            None
         """
         super().__init__()
 
         self._label_info = self._dispatch_label_info(label_info)
         self.model_name = model_name
-        if isinstance(data_input_params, dict):
-            data_input_params = DataInputParams(**data_input_params)
-        elif data_input_params is None:
-            data_input_params = (
-                self._default_preprocessing_params[self.model_name]
-                if isinstance(self._default_preprocessing_params, dict)
-                else self._default_preprocessing_params
-            )
-        self._check_preprocessing_params(data_input_params)
-        self.data_input_params = data_input_params
+        self.data_input_params = self._configure_preprocessing_params(data_input_params)
         self.model = self._create_model()
         self.optimizer_callable = ensure_callable(optimizer)
         self.scheduler_callable = ensure_callable(scheduler)
         self.metric_callable = ensure_callable(metric)
-        self._task = task
-
         self.torch_compile = torch_compile
         self._explain_mode = False
 
@@ -189,6 +179,7 @@ def __init__(
         if isinstance(tile_config, dict):
             tile_config = TileConfig(**tile_config)
         self._tile_config = tile_config.clone()
+
         self.save_hyperparameters(
             logger=False,
             ignore=["optimizer", "scheduler", "metric", "label_info", "tile_config", "data_input_params"],
@@ -957,40 +948,71 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo:
 
         raise TypeError(label_info)
 
-    def _check_preprocessing_params(self, preprocessing_params: DataInputParams | None) -> None:
+    def _configure_preprocessing_params(
+        self,
+        preprocessing_params: DataInputParams | dict | None = None,
+    ) -> DataInputParams:
         """Check the validity of the preprocessing parameters."""
-        if preprocessing_params is None:
-            msg = "Data input parameters should not be None."
-            raise ValueError(msg)
+        default = (
+            self._default_preprocessing_params[self.model_name]
+            if isinstance(self._default_preprocessing_params, dict)
+            else self._default_preprocessing_params
+        )
+
+        if isinstance(preprocessing_params, dict):
+            # Merge with model defaults for any missing keys so callers can pass
+            # a partial dict (e.g. only input_size) without knowing mean/std upfront.
+            data_input_params = DataInputParams(
+                input_size=preprocessing_params.get("input_size") or default.input_size,
+                mean=preprocessing_params.get("mean") or default.mean,
+                std=preprocessing_params.get("std") or default.std,
+            )
+        elif isinstance(preprocessing_params, DataInputParams):
+            data_input_params = preprocessing_params
+        elif preprocessing_params is None:
+            data_input_params = default
+        else:
+            msg = (
+                f"preprocessing_params should be either dict or DataInputParams, "
+                f"but got {type(preprocessing_params)} instead."
+            )
+            raise TypeError(msg)
 
-        input_size = preprocessing_params.input_size
-        mean = preprocessing_params.mean
-        std = preprocessing_params.std
+        # Validate
+        if data_input_params.mean is None:
+            msg = "Mean must be provided (either explicitly or via model defaults)."
+            raise ValueError(msg)
+        if data_input_params.std is None:
+            msg = "Std must be provided (either explicitly or via model defaults)."
+            raise ValueError(msg)
 
-        if not (len(mean) == 3 and all(isinstance(m, float) for m in mean)):
-            msg = f"Mean should be a tuple of 3 float values, but got {mean} instead."
+        if not (len(data_input_params.mean) == 3 and all(isinstance(m, float) for m in data_input_params.mean)):
+            msg = f"Mean should be a tuple of 3 float values, but got {data_input_params.mean} instead."
             raise ValueError(msg)
-        if not (len(std) == 3 and all(isinstance(s, float) for s in std)):
-            msg = f"Std should be a tuple of 3 float values, but got {std} instead."
+        if not (len(data_input_params.std) == 3 and all(isinstance(s, float) for s in data_input_params.std)):
+            msg = f"Std should be a tuple of 3 float values, but got {data_input_params.std} instead."
             raise ValueError(msg)
 
-        if not all(0 <= m <= 255 for m in mean):
-            msg = f"Mean values should be in the range [0, 255], but got {mean} instead."
+        if not all(m >= 0 for m in data_input_params.mean):
+            msg = f"Mean values should be non-negative, but got {data_input_params.mean} instead."
             raise ValueError(msg)
-        if not all(0 <= s <= 255 for s in std):
-            msg = f"Std values should be in the range [0, 255], but got {std} instead."
+        if not all(s > 0 for s in data_input_params.std):
+            msg = f"Std values should be positive, but got {data_input_params.std} instead."
             raise ValueError(msg)
 
-        if input_size is not None and (
-            input_size[0] % self.input_size_multiplier != 0 or input_size[1] % self.input_size_multiplier != 0
+        if data_input_params.input_size is not None and (
+            data_input_params.input_size[0] % self.input_size_multiplier != 0
+            or data_input_params.input_size[1] % self.input_size_multiplier != 0
         ):
-            msg = f"Input size should be a multiple of {self.input_size_multiplier}, but got {input_size} instead."
+            msg = (
+                f"Input size should be a multiple of {self.input_size_multiplier}, "
+                f"but got {data_input_params.input_size} instead."
+            )
             raise ValueError(msg)
 
+        return data_input_params
+
     @property
+    @abstractmethod
     def task(self) -> OTXTaskType:
         """Get  task type."""
-        if self._task is None:
-            msg = "Task type is not set. Please set the task type before using this model."
-            raise ValueError(msg)
-        return self._task
diff --git a/library/src/otx/backend/native/models/classification/factory.py b/library/src/otx/backend/native/models/classification/factory.py
index 5e85f95dcb3..781bfa38e99 100644
--- a/library/src/otx/backend/native/models/classification/factory.py
+++ b/library/src/otx/backend/native/models/classification/factory.py
@@ -50,7 +50,7 @@ class MobileNetV3:
     def __new__(
         cls,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         task: Literal["multi_class", "multi_label", "h_label"] = "multi_class",
         freeze_backbone: bool = False,
         model_name: Literal["mobilenetv3_large", "mobilenetv3_small"] = "mobilenetv3_large",
@@ -69,8 +69,8 @@ def __new__(
 
         Args:
             label_info (LabelInfoTypes): The label information.
-            data_input_params (DataInputParams | dict): The data input parameters that consists
-                of input size, mean and std.
+            data_input_params (DataInputParams | dict | None, optional): The data input parameters that consists
+                of input size, mean and std. Defaults to None.
             freeze_backbone (bool, optional): Whether to freeze the backbone during training. Defaults to False.
                 Note: only multiclass classification supports this argument.
             model_name (str, optional): The model name. Defaults to "mobilenetv3_large".
@@ -88,8 +88,8 @@ def __new__(
         ...     task="multi_class",
         ...     label_info=10,
         ...     data_input_params={"input_size": (224, 224),
-        ...                        "mean": [123.675, 116.28, 103.53],
-        ...                        "std": [58.395, 57.12, 57.375]},
+        ...                        "mean": [0.485, 0.456, 0.406],
+        ...                        "std": [0.229, 0.224, 0.225]},
         ...     model_name="mobilenetv3_small",
         ... )
 
@@ -98,8 +98,8 @@ def __new__(
         ...     task="multi_label",
         ...     model_name="mobilenetv3_large",
         ...     data_input_params={"input_size": (224, 224),
-        ...                        "mean": [123.675, 116.28, 103.53],
-        ...                        "std": [58.395, 57.12, 57.375]},
+        ...                        "mean": [0.485, 0.456, 0.406],
+        ...                        "std": [0.229, 0.224, 0.225]},
         ...     label_info=[1, 5, 10]  # Multi-label setup
         ... )
         """
@@ -120,7 +120,7 @@ class EfficientNet:
     def __new__(
         cls,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         task: Literal["multi_class", "multi_label", "h_label"] = "multi_class",
         model_name: Literal[
             "efficientnet_b0",
@@ -149,8 +149,8 @@ def __new__(
 
         Args:
             label_info (LabelInfoTypes): The label information.
-            data_input_params (DataInputParams | dict): The data input parameters that consists
-                of input size, mean and std.
+            data_input_params (DataInputParams | dict | None, optional): The data input parameters that consists
+                of input size, mean and std. Defaults to None.
             freeze_backbone (bool, optional): Whether to freeze the backbone during training. Defaults to False.
                 Note: only multiclass classification supports this argument.
             model_name (Literal["efficientnet_b0", "efficientnet_b1", "efficientnet_b2", "efficientnet_b3",
@@ -192,7 +192,7 @@ class TimmModel:
     def __new__(
         cls,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         task: Literal["multi_class", "multi_label", "h_label"] = "multi_class",
         model_name: str = "tf_efficientnetv2_s.in21k",
         freeze_backbone: bool = False,
@@ -221,8 +221,8 @@ def __new__(
 
         Args:
             label_info (LabelInfoTypes): The label information.
-            data_input_params (DataInputParams | dict): The data input parameters that consists
-                of input size, mean and std.
+            data_input_params (DataInputParams | dict | None, optional): The data input parameters that consists
+                of input size, mean and std. Defaults to None.
             freeze_backbone (bool, optional): Whether to freeze the backbone during training.
                 Note: only multiclass classification supports this argument. Defaults to False.
             model_name (str, optional): The model name. Defaults to "tf_efficientnetv2_s.in21k".
@@ -279,7 +279,7 @@ class TVModel:
     def __new__(
         cls,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         task: Literal["multi_class", "multi_label", "h_label"] = "multi_class",
         model_name: str = "efficientnet_v2_s",
         freeze_backbone: bool = False,
@@ -304,8 +304,8 @@ def __new__(
 
         Args:
             label_info (LabelInfoTypes): The label information.
-            data_input_params (DataInputParams | dict): The data input parameters that consists
-                of input size, mean and std.
+            data_input_params (DataInputParams | dict | None, optional): The data input parameters that consists
+                of input size, mean and std. Defaults to None.
             freeze_backbone (bool, optional): Whether to freeze the backbone during training.
                 Note: only multiclass classification supports this argument. Defaults to False.
             model_name (str, optional): The model name. Defaults to "efficientnet_v2_s".
@@ -361,7 +361,7 @@ class VisionTransformer:
     def __new__(
         cls,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         task: Literal["multi_class", "multi_label", "h_label"] = "multi_class",
         model_name: Literal[
             "vit-tiny",
@@ -393,8 +393,8 @@ def __new__(
 
         Args:
             label_info (LabelInfoTypes): The label information.
-            data_input_params (DataInputParams | dict): The data input parameters that consists
-                of input size, mean and std.
+            data_input_params (DataInputParams | dict | None, optional): The data input parameters that consists
+                of input size, mean and std. Defaults to None.
             freeze_backbone (bool, optional): Whether to freeze the backbone during training.
                 Note: only multiclass classification supports this argument. Defaults to False.
             model_name (Literal["vit-tiny", "vit-small", "vit-base", "vit-large",
diff --git a/library/src/otx/backend/native/models/classification/hlabel_models/base.py b/library/src/otx/backend/native/models/classification/hlabel_models/base.py
index 6664f1ac509..cc227821f1f 100644
--- a/library/src/otx/backend/native/models/classification/hlabel_models/base.py
+++ b/library/src/otx/backend/native/models/classification/hlabel_models/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Class definition for classification model entity used in OTX."""
@@ -40,8 +40,8 @@ class OTXHlabelClsModel(OTXModel):
 
     Args:
         label_info (HLabelInfo): Information about the hierarchical labels.
-        data_input_params (DataInputParams | None, optional): Parameters for image data preprocessing. If None is given,
-            default parameters for the specific model will be used.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for image data
+            preprocessing. If None is given, default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "hlabel_classification_model".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
@@ -56,7 +56,7 @@ class OTXHlabelClsModel(OTXModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "hlabel_classification_model",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
@@ -69,7 +69,6 @@ def __init__(
         super().__init__(
             label_info=label_info,
             data_input_params=data_input_params,
-            task=OTXTaskType.H_LABEL_CLS,
             model_name=model_name,
             optimizer=optimizer,
             scheduler=scheduler,
@@ -243,6 +242,9 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo:
 
     def get_dummy_input(self, batch_size: int = 1) -> OTXSampleBatch:  # type: ignore[override]
         """Returns a dummy input for classification OV model."""
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         images = torch.stack([torch.rand(3, *self.data_input_params.input_size) for _ in range(batch_size)])
         labels = [torch.LongTensor([0])] * batch_size
         return OTXSampleBatch(images=images, labels=labels)
@@ -267,6 +269,11 @@ def forward_for_tracing(self, image: Tensor) -> Tensor | dict[str, Tensor]:
 
         return self.model(images=image, mode="tensor")
 
+    @property
+    def task(self) -> OTXTaskType:
+        """Return task type."""
+        return OTXTaskType.H_LABEL_CLS
+
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(224, 224), mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375))
+        return DataInputParams(input_size=(224, 224), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
diff --git a/library/src/otx/backend/native/models/classification/hlabel_models/efficientnet.py b/library/src/otx/backend/native/models/classification/hlabel_models/efficientnet.py
index af5808aab5c..f534cb39e73 100644
--- a/library/src/otx/backend/native/models/classification/hlabel_models/efficientnet.py
+++ b/library/src/otx/backend/native/models/classification/hlabel_models/efficientnet.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """EfficientNet-B0 model implementation."""
@@ -36,7 +36,7 @@ class EfficientNetHLabelCls(OTXHlabelClsModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "efficientnet_b0",
             "efficientnet_b1",
@@ -70,6 +70,9 @@ def _create_model(self, head_config: dict | None = None) -> nn.Module:  # type:
         if not isinstance(self.label_info, HLabelInfo):
             raise TypeError(self.label_info)
 
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         backbone = EfficientNetBackbone(model_name=self.model_name, input_size=self.data_input_params.input_size)
 
         copied_head_config = copy(head_config)
diff --git a/library/src/otx/backend/native/models/classification/hlabel_models/mobilenet_v3.py b/library/src/otx/backend/native/models/classification/hlabel_models/mobilenet_v3.py
index 76169a40008..a5d66072254 100644
--- a/library/src/otx/backend/native/models/classification/hlabel_models/mobilenet_v3.py
+++ b/library/src/otx/backend/native/models/classification/hlabel_models/mobilenet_v3.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """MobileNetV3 model implementation."""
@@ -40,7 +40,7 @@ class MobileNetV3HLabelCls(OTXHlabelClsModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["mobilenetv3_large", "mobilenetv3_small"] = "mobilenetv3_large",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
@@ -64,6 +64,9 @@ def _create_model(self, head_config: dict | None = None) -> nn.Module:  # type:
         if not isinstance(self.label_info, HLabelInfo):
             raise TypeError(self.label_info)
 
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         copied_head_config = copy(head_config)
         copied_head_config["step_size"] = (
             ceil(self.data_input_params.input_size[0] / 32),
diff --git a/library/src/otx/backend/native/models/classification/hlabel_models/timm_model.py b/library/src/otx/backend/native/models/classification/hlabel_models/timm_model.py
index a5f024ad4b8..9ca65ab7b77 100644
--- a/library/src/otx/backend/native/models/classification/hlabel_models/timm_model.py
+++ b/library/src/otx/backend/native/models/classification/hlabel_models/timm_model.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """TIMM wrapper model class for OTX."""
@@ -51,7 +51,7 @@ class TimmModelHLabelCls(OTXHlabelClsModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "tf_efficientnetv2_s.in21k",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
@@ -75,6 +75,9 @@ def __init__(
     def _create_model(self, head_config: dict | None = None) -> nn.Module:  # type: ignore[override]
         head_config = head_config if head_config is not None else self.label_info.as_head_config_dict()
         backbone = TimmBackbone(model_name=self.model_name)
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         copied_head_config = copy(head_config)
         copied_head_config["step_size"] = (
             ceil(self.data_input_params.input_size[0] / 32),
diff --git a/library/src/otx/backend/native/models/classification/hlabel_models/torchvision_model.py b/library/src/otx/backend/native/models/classification/hlabel_models/torchvision_model.py
index e760235bdc4..a3d72a83190 100644
--- a/library/src/otx/backend/native/models/classification/hlabel_models/torchvision_model.py
+++ b/library/src/otx/backend/native/models/classification/hlabel_models/torchvision_model.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Torchvision model for the OTX classification."""
@@ -46,7 +46,7 @@ class TVModelHLabelCls(OTXHlabelClsModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "efficientnet_v2_s",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
diff --git a/library/src/otx/backend/native/models/classification/hlabel_models/vit.py b/library/src/otx/backend/native/models/classification/hlabel_models/vit.py
index dc93f288e85..d2606a449f9 100644
--- a/library/src/otx/backend/native/models/classification/hlabel_models/vit.py
+++ b/library/src/otx/backend/native/models/classification/hlabel_models/vit.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """ViT model implementation."""
@@ -55,7 +55,7 @@ class VisionTransformerHLabelCls(ForwardExplainMixInForViT, OTXHlabelClsModel):
     Args:
         label_info (HLabelInfo): Information about the hierarchical labels.
         model_name (str): Name of the Vision Transformer model to use.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
         optimizer (OptimizerCallable): Callable for the optimizer.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable): Callable for the learning rate scheduler.
         metric (MetricCallable): Callable for the metric.
@@ -67,7 +67,7 @@ class VisionTransformerHLabelCls(ForwardExplainMixInForViT, OTXHlabelClsModel):
     def __init__(
         self,
         label_info: HLabelInfo,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "vit-tiny",
             "vit-small",
@@ -101,6 +101,9 @@ def _create_model(self, head_config: dict | None = None) -> nn.Module:  # type:
         head_config = head_config if head_config is not None else self.label_info.as_head_config_dict()
         if not isinstance(self.label_info, HLabelInfo):
             raise TypeError(self.label_info)
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         init_cfg = [
             {"std": 0.2, "layer": "Linear", "type": "TruncNormal"},
             {"bias": 0.0, "val": 1.0, "layer": "LayerNorm", "type": "Constant"},
diff --git a/library/src/otx/backend/native/models/classification/multiclass_models/base.py b/library/src/otx/backend/native/models/classification/multiclass_models/base.py
index 913881ef575..d078e97880e 100644
--- a/library/src/otx/backend/native/models/classification/multiclass_models/base.py
+++ b/library/src/otx/backend/native/models/classification/multiclass_models/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Class definition for classification model entity used in OTX."""
@@ -37,7 +37,7 @@ class OTXMulticlassClsModel(OTXModel):
         label_info (LabelInfoTypes | int | Sequence): Information about the labels used in the model.
             If `int` is given, label info will be constructed from number of classes,
             if `Sequence` is given, label info will be constructed from the sequence of label names.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None is given, default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "multiclass_classification_model".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
@@ -50,7 +50,7 @@ class OTXMulticlassClsModel(OTXModel):
     def __init__(
         self,
         label_info: LabelInfoTypes | int | Sequence,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "multiclass_classification_model",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
@@ -61,7 +61,6 @@ def __init__(
         super().__init__(
             label_info=label_info,
             data_input_params=data_input_params,
-            task=OTXTaskType.MULTI_CLASS_CLS,
             model_name=model_name,
             optimizer=optimizer,
             scheduler=scheduler,
@@ -161,6 +160,9 @@ def _reset_prediction_layer(self, num_classes: int) -> None:
 
     def get_dummy_input(self, batch_size: int = 1) -> OTXSampleBatch:  # type: ignore[override]
         """Returns a dummy input for classification model."""
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         images = torch.stack([torch.rand(3, *self.data_input_params.input_size) for _ in range(batch_size)])
         labels = [torch.LongTensor([0])] * batch_size
         return OTXSampleBatch(images=images, labels=labels)
@@ -182,6 +184,11 @@ def forward_explain(self, inputs: OTXSampleBatch) -> OTXPredictionBatch:
             feature_vector=[feature_vector.unsqueeze(0) for feature_vector in outputs["feature_vector"]],
         )
 
+    @property
+    def task(self) -> OTXTaskType:
+        """Return task type."""
+        return OTXTaskType.MULTI_CLASS_CLS
+
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(224, 224), mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375))
+        return DataInputParams(input_size=(224, 224), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
diff --git a/library/src/otx/backend/native/models/classification/multiclass_models/efficientnet.py b/library/src/otx/backend/native/models/classification/multiclass_models/efficientnet.py
index a6a767ab26d..5c0934371dc 100644
--- a/library/src/otx/backend/native/models/classification/multiclass_models/efficientnet.py
+++ b/library/src/otx/backend/native/models/classification/multiclass_models/efficientnet.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """EfficientNet-B0 model implementation."""
@@ -30,7 +30,7 @@ class EfficientNetMulticlassCls(OTXMulticlassClsModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
         model_name (str, optional): Name of the EfficientNet model variant.
             Defaults to "efficientnet_b0".
         optimizer (OptimizerCallable, optional): Callable for the optimizer.
@@ -45,7 +45,7 @@ class EfficientNetMulticlassCls(OTXMulticlassClsModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "efficientnet_b0",
             "efficientnet_b1",
diff --git a/library/src/otx/backend/native/models/classification/multiclass_models/mobilenet_v3.py b/library/src/otx/backend/native/models/classification/multiclass_models/mobilenet_v3.py
index eea26103a05..846756734c0 100644
--- a/library/src/otx/backend/native/models/classification/multiclass_models/mobilenet_v3.py
+++ b/library/src/otx/backend/native/models/classification/multiclass_models/mobilenet_v3.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """MobileNetV3 model implementation."""
@@ -30,7 +30,7 @@ class MobileNetV3MulticlassCls(OTXMulticlassClsModel):
 
     Args:
         label_info (LabelInfoTypes): The label information.
-        data_input_params (DataInputParams | None, optional): The data input parameters
+        data_input_params (DataInputParams | dict | None, optional): The data input parameters
             such as input size and normalization. If None is given,
             default parameters for the specific model will be used.
         model_name (Literal["mobilenetv3_large", "mobilenetv3_small"], optional): The model name.
@@ -45,7 +45,7 @@ class MobileNetV3MulticlassCls(OTXMulticlassClsModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["mobilenetv3_large", "mobilenetv3_small"] = "mobilenetv3_large",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
diff --git a/library/src/otx/backend/native/models/classification/multiclass_models/timm_model.py b/library/src/otx/backend/native/models/classification/multiclass_models/timm_model.py
index e3823ca6305..1e84cd27608 100644
--- a/library/src/otx/backend/native/models/classification/multiclass_models/timm_model.py
+++ b/library/src/otx/backend/native/models/classification/multiclass_models/timm_model.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """TIMM wrapper model class for OTX."""
@@ -59,7 +59,7 @@ class TimmModelMulticlassCls(OTXMulticlassClsModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "tf_efficientnetv2_s.in21k",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
diff --git a/library/src/otx/backend/native/models/classification/multiclass_models/torchvision_model.py b/library/src/otx/backend/native/models/classification/multiclass_models/torchvision_model.py
index 2ad7be959e4..c5a1a0f0645 100644
--- a/library/src/otx/backend/native/models/classification/multiclass_models/torchvision_model.py
+++ b/library/src/otx/backend/native/models/classification/multiclass_models/torchvision_model.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Torchvision model for the OTX classification."""
@@ -35,7 +35,7 @@ class TVModelMulticlassCls(OTXMulticlassClsModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels.
-        data_input_params (DataInputParams | None, optional): The data input parameters
+        data_input_params (DataInputParams | dict | None, optional): The data input parameters
             such as input size and normalization. If None is given,
             default parameters for the specific model will be used.
         model_name (str, optional): Backbone model name for feature extraction. Defaults to "efficientnet_v2_s".
@@ -49,7 +49,7 @@ class TVModelMulticlassCls(OTXMulticlassClsModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "efficientnet_v2_s",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
diff --git a/library/src/otx/backend/native/models/classification/multiclass_models/vit.py b/library/src/otx/backend/native/models/classification/multiclass_models/vit.py
index 4deddb99a5a..df3ad563439 100644
--- a/library/src/otx/backend/native/models/classification/multiclass_models/vit.py
+++ b/library/src/otx/backend/native/models/classification/multiclass_models/vit.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """ViT model implementation."""
@@ -203,7 +203,7 @@ class VisionTransformerMulticlassCls(ForwardExplainMixInForViT, OTXMulticlassCls
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "vit-tiny",
             "vit-small",
@@ -252,6 +252,9 @@ def _create_model(self, num_classes: int | None = None) -> nn.Module:
         return model
 
     def _build_model(self, num_classes: int) -> nn.Module:
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         init_cfg = [
             {"std": 0.2, "layer": "Linear", "type": "TruncNormal"},
             {"bias": 0.0, "val": 1.0, "layer": "LayerNorm", "type": "Constant"},
diff --git a/library/src/otx/backend/native/models/classification/multilabel_models/base.py b/library/src/otx/backend/native/models/classification/multilabel_models/base.py
index af3dbc0ef4e..d93259118ab 100644
--- a/library/src/otx/backend/native/models/classification/multilabel_models/base.py
+++ b/library/src/otx/backend/native/models/classification/multilabel_models/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Class definition for classification model entity used in OTX."""
@@ -36,7 +36,7 @@ class OTXMultilabelClsModel(OTXModel):
     Args:
         label_info (LabelInfoTypes | int | Sequence): Information about the labels used in the model.
             if `Sequence` is given, label info will be constructed from the sequence of label names.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
         model_name (str, optional): Name of the model. Defaults to "multilabel_classification_model".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
@@ -48,7 +48,7 @@ class OTXMultilabelClsModel(OTXModel):
     def __init__(
         self,
         label_info: LabelInfoTypes | Sequence,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "multiclass_classification_model",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
@@ -59,7 +59,6 @@ def __init__(
         super().__init__(
             label_info=label_info,
             data_input_params=data_input_params,
-            task=OTXTaskType.MULTI_LABEL_CLS,
             model_name=model_name,
             optimizer=optimizer,
             scheduler=scheduler,
@@ -158,6 +157,9 @@ def forward_for_tracing(self, image: Tensor) -> Tensor | dict[str, Tensor]:
 
     def get_dummy_input(self, batch_size: int = 1) -> OTXSampleBatch:  # type: ignore[override]
         """Returns a dummy input for classification model."""
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         images = torch.stack([torch.rand(3, *self.data_input_params.input_size) for _ in range(batch_size)])
         labels = [torch.LongTensor([0])] * batch_size
         return OTXSampleBatch(images=images, labels=labels)
@@ -175,6 +177,11 @@ def forward_explain(self, inputs: OTXSampleBatch) -> OTXPredictionBatch:
             feature_vector=[feature_vector.unsqueeze(0) for feature_vector in outputs["feature_vector"]],
         )
 
+    @property
+    def task(self) -> OTXTaskType:
+        """Return task type."""
+        return OTXTaskType.MULTI_LABEL_CLS
+
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(224, 224), mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375))
+        return DataInputParams(input_size=(224, 224), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
diff --git a/library/src/otx/backend/native/models/classification/multilabel_models/efficientnet.py b/library/src/otx/backend/native/models/classification/multilabel_models/efficientnet.py
index c605b1f8f08..ee796cb7ce5 100644
--- a/library/src/otx/backend/native/models/classification/multilabel_models/efficientnet.py
+++ b/library/src/otx/backend/native/models/classification/multilabel_models/efficientnet.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """EfficientNet-B0 model implementation."""
@@ -33,7 +33,7 @@ class EfficientNetMultilabelCls(OTXMultilabelClsModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "efficientnet_b0",
             "efficientnet_b1",
diff --git a/library/src/otx/backend/native/models/classification/multilabel_models/mobilenet_v3.py b/library/src/otx/backend/native/models/classification/multilabel_models/mobilenet_v3.py
index 9948f9d39ff..499569f9dad 100644
--- a/library/src/otx/backend/native/models/classification/multilabel_models/mobilenet_v3.py
+++ b/library/src/otx/backend/native/models/classification/multilabel_models/mobilenet_v3.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """MobileNetV3 model implementation."""
@@ -36,7 +36,7 @@ class MobileNetV3MultilabelCls(OTXMultilabelClsModel):
 
     Args:
         label_info (LabelInfoTypes): The label information.
-        data_input_params (DataInputParams | None, optional): The data input parameters
+        data_input_params (DataInputParams | dict | None, optional): The data input parameters
             such as input size and normalization. If None is given,
             default parameters for the specific model will be used.
         model_name (str, optional): The model name. Defaults to "mobilenetv3_large".
@@ -50,7 +50,7 @@ class MobileNetV3MultilabelCls(OTXMultilabelClsModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["mobilenetv3_large", "mobilenetv3_small"] = "mobilenetv3_large",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
diff --git a/library/src/otx/backend/native/models/classification/multilabel_models/timm_model.py b/library/src/otx/backend/native/models/classification/multilabel_models/timm_model.py
index ebfb24dee09..730e95d48d9 100644
--- a/library/src/otx/backend/native/models/classification/multilabel_models/timm_model.py
+++ b/library/src/otx/backend/native/models/classification/multilabel_models/timm_model.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """TIMM wrapper model class for OTX."""
@@ -50,7 +50,7 @@ class TimmModelMultilabelCls(OTXMultilabelClsModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "tf_efficientnetv2_s.in21k",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
diff --git a/library/src/otx/backend/native/models/classification/multilabel_models/torchvision_model.py b/library/src/otx/backend/native/models/classification/multilabel_models/torchvision_model.py
index b188f83a0b4..4040a6bcf88 100644
--- a/library/src/otx/backend/native/models/classification/multilabel_models/torchvision_model.py
+++ b/library/src/otx/backend/native/models/classification/multilabel_models/torchvision_model.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Torchvision model for the OTX classification."""
@@ -48,7 +48,7 @@ class TVModelMultilabelCls(OTXMultilabelClsModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "efficientnet_v2_s",
         freeze_backbone: bool = False,
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
diff --git a/library/src/otx/backend/native/models/classification/multilabel_models/vit.py b/library/src/otx/backend/native/models/classification/multilabel_models/vit.py
index 3b53d91bfa4..b8633e198d1 100644
--- a/library/src/otx/backend/native/models/classification/multilabel_models/vit.py
+++ b/library/src/otx/backend/native/models/classification/multilabel_models/vit.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """ViT model implementation."""
@@ -57,7 +57,7 @@ class VisionTransformerMultilabelCls(ForwardExplainMixInForViT, OTXMultilabelCls
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "vit-tiny",
             "vit-small",
@@ -90,6 +90,9 @@ def __init__(
 
     def _create_model(self, num_classes: int | None = None) -> nn.Module:
         num_classes = num_classes if num_classes is not None else self.num_classes
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         vit_backbone = VisionTransformerBackbone(
             model_name=self.model_name,
             img_size=self.data_input_params.input_size,
diff --git a/library/src/otx/backend/native/models/common/rfdetr_mixin.py b/library/src/otx/backend/native/models/common/rfdetr_mixin.py
index 4e0be959fff..9a904d51408 100644
--- a/library/src/otx/backend/native/models/common/rfdetr_mixin.py
+++ b/library/src/otx/backend/native/models/common/rfdetr_mixin.py
@@ -206,7 +206,7 @@ def _customize_outputs(  # pyrefly: ignore[bad-override]
                     raise TypeError(msg)
             return losses
 
-        image_shapes = [img_info.img_shape for img_info in inputs.imgs_info]  # type: ignore[union-attr]
+        image_shapes = [img_info.ori_shape for img_info in inputs.imgs_info]  # type: ignore[union-attr]
         scores_list, boxes_list, labels_list, masks_list = self.model.postprocess(  # type: ignore[attr-defined]  # pyrefly: ignore[not-callable]
             outputs,
             image_shapes,
diff --git a/library/src/otx/backend/native/models/detection/atss.py b/library/src/otx/backend/native/models/detection/atss.py
index 266775351c1..d67b28256a5 100644
--- a/library/src/otx/backend/native/models/detection/atss.py
+++ b/library/src/otx/backend/native/models/detection/atss.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """ATSS model implementations."""
@@ -43,7 +43,7 @@ class ATSS(OTXDetectionModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels.
-        data_input_params (DataInputParams | None): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None, uses _default_preprocessing_params.
         model_name (Literal, optional): Name of the model to use. Defaults to "atss_mobilenetv2".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
@@ -64,7 +64,7 @@ class ATSS(OTXDetectionModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "atss_mobilenetv2",
             "atss_resnext101",
@@ -201,4 +201,4 @@ def _exporter(self) -> OTXModelExporter:
 
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(800, 992), mean=(0.0, 0.0, 0.0), std=(255.0, 255.0, 255.0))
+        return DataInputParams(input_size=(800, 992), mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0))
diff --git a/library/src/otx/backend/native/models/detection/base.py b/library/src/otx/backend/native/models/detection/base.py
index 223b60c55fc..8684893f3f3 100644
--- a/library/src/otx/backend/native/models/detection/base.py
+++ b/library/src/otx/backend/native/models/detection/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Class definition for detection model entity used in OTX."""
@@ -10,7 +10,7 @@
 import logging as log
 import types
 from contextlib import contextmanager
-from typing import TYPE_CHECKING, Any, Callable, Iterator, Literal, Sequence
+from typing import TYPE_CHECKING, Any, Callable, Iterator, Literal, Sequence, cast
 
 import torch
 from torchmetrics import Metric, MetricCollection
@@ -25,7 +25,6 @@
 from otx.data.entity.base import ImageInfo, OTXBatchLossEntity
 from otx.data.entity.sample import OTXPredictionBatch, OTXSampleBatch
 from otx.data.entity.tile import OTXTileBatchDataEntity
-from otx.data.entity.utils import stack_batch
 from otx.metrics import MetricCallable, MetricInput
 from otx.metrics.fmeasure import FMeasure, MeanAveragePrecisionFMeasureCallable
 from otx.types.export import TaskLevelExportParameters
@@ -81,9 +80,8 @@ def __init__(
     ) -> None:
         super().__init__(
             label_info=label_info,
-            model_name=model_name,
-            task=OTXTaskType.DETECTION,
             data_input_params=data_input_params,
+            model_name=model_name,
             optimizer=optimizer,
             scheduler=scheduler,
             metric=metric,
@@ -182,21 +180,11 @@ def _filter_outputs_by_threshold(self, outputs: OTXPredictionBatch) -> OTXPredic
     def _customize_inputs(
         self,
         entity: OTXSampleBatch,
-        pad_size_divisor: int = 32,
-        pad_value: int = 0,
     ) -> dict[str, Any]:
-        if isinstance(entity.images, list):
-            entity.images, entity.imgs_info = stack_batch(  # type: ignore[assignment]
-                entity.images,
-                entity.imgs_info,  # type: ignore[arg-type]
-                pad_size_divisor=pad_size_divisor,
-                pad_value=pad_value,
-            )
         inputs: dict[str, Any] = {}
 
         inputs["entity"] = entity
         inputs["mode"] = "loss" if self.training else "predict"
-
         return inputs
 
     def _customize_outputs(
@@ -417,16 +405,9 @@ def best_confidence_threshold(self) -> float:
 
     def get_dummy_input(self, batch_size: int = 1) -> OTXSampleBatch:  # type: ignore[override]
         """Returns a dummy input for detection model."""
-        images = [torch.rand(3, *self.data_input_params.input_size) for _ in range(batch_size)]
-        infos = []
-        for i, img in enumerate(images):
-            infos.append(
-                ImageInfo(
-                    img_idx=i,
-                    img_shape=img.shape,
-                    ori_shape=img.shape,
-                ),
-            )
+        images = torch.stack([torch.rand(3, *self.data_input_params.input_size) for _ in range(batch_size)])
+        img_shape = (images.shape[2], images.shape[3])
+        infos = [ImageInfo(img_idx=i, img_shape=img_shape, ori_shape=img_shape) for i in range(batch_size)]
         return OTXSampleBatch(images=images, imgs_info=infos)
 
     def forward_explain(self, inputs: OTXSampleBatch | OTXTileBatchDataEntity) -> OTXPredictionBatch:
@@ -458,7 +439,7 @@ def _forward_explain_detection(
         mode: str = "tensor",
     ) -> dict[str, torch.Tensor]:
         """Forward func of the BaseDetector instance, which located in is in OTXDetectionModel().model."""
-        backbone_feat = self.extract_feat(entity.images)
+        backbone_feat = self.extract_feat(cast("torch.Tensor", entity.images))
         bbox_head_feat = self.bbox_head.forward(backbone_feat)
 
         # Process the first output form bbox detection head: classification scores
@@ -551,4 +532,9 @@ def get_num_anchors(self) -> list[int]:
 
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(255.0, 255.0, 255.0))
+        return DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0))
+
+    @property
+    def task(self) -> OTXTaskType:
+        """Return task type."""
+        return OTXTaskType.DETECTION
diff --git a/library/src/otx/backend/native/models/detection/d_fine.py b/library/src/otx/backend/native/models/detection/d_fine.py
index 9906d07a090..292ae004189 100644
--- a/library/src/otx/backend/native/models/detection/d_fine.py
+++ b/library/src/otx/backend/native/models/detection/d_fine.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """D-Fine model implementations."""
@@ -35,7 +35,7 @@ class DFine(RTDETR):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels.
-        data_input_params (DataInputParams | None): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None, uses _default_preprocessing_params.
         model_name (literal, optional): Name of the model to use. Defaults to "dfine_hgnetv2_x".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
@@ -60,7 +60,7 @@ class DFine(RTDETR):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "dfine_hgnetv2_n",
             "dfine_hgnetv2_s",
@@ -89,6 +89,9 @@ def __init__(
 
     def _create_model(self, num_classes: int | None = None) -> DETR:
         num_classes = num_classes if num_classes is not None else self.num_classes
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         backbone = HGNetv2(model_name=self.model_name)
         encoder = HybridEncoder(model_name=self.model_name)
         decoder = DFINETransformer(
diff --git a/library/src/otx/backend/native/models/detection/deim.py b/library/src/otx/backend/native/models/detection/deim.py
index 8f8be529cab..606c7fb03cb 100644
--- a/library/src/otx/backend/native/models/detection/deim.py
+++ b/library/src/otx/backend/native/models/detection/deim.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """DEIM-DFine model implementations."""
@@ -46,7 +46,7 @@ class DEIMDFine(RTDETR):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels.
-        data_input_params (DataInputParams | None): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None, uses _default_preprocessing_params.
         model_name (literal, optional): Name of the model to use. Defaults to "deim_dfine_hgnetv2_x".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
@@ -71,7 +71,7 @@ class DEIMDFine(RTDETR):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "deim_dfine_hgnetv2_n",
             "deim_dfine_hgnetv2_s",
@@ -101,6 +101,9 @@ def __init__(
     def _create_model(self, num_classes: int | None = None) -> DETR:
         """Create DEIM-DFine model."""
         num_classes = num_classes if num_classes is not None else self.num_classes
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         backbone = HGNetv2(model_name=self.model_name)
         encoder = HybridEncoder(model_name=self.model_name)
         decoder = DFINETransformer(
diff --git a/library/src/otx/backend/native/models/detection/deimv2.py b/library/src/otx/backend/native/models/detection/deimv2.py
index 6f6d70f3286..5f1c673e0c6 100644
--- a/library/src/otx/backend/native/models/detection/deimv2.py
+++ b/library/src/otx/backend/native/models/detection/deimv2.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """DEIM-DFine model implementations."""
@@ -46,7 +46,7 @@ class DEIMV2(DEIMDFine):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels.
-        data_input_params (DataInputParams | None): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None, uses _default_preprocessing_params.
         model_name (literal, optional): Name of the model to use. Defaults to "deim_dfine_hgnetv2_x".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
@@ -70,7 +70,7 @@ class DEIMV2(DEIMDFine):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "deimv2_x",
             "deimv2_l",
@@ -99,6 +99,9 @@ def __init__(
     def _create_model(self, num_classes: int | None = None) -> DETR:
         """Create DEIM-DFine model."""
         num_classes = num_classes if num_classes is not None else self.num_classes
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         backbone = DINOv3STAs(model_name=self.model_name)
         encoder = HybridEncoder(model_name=self.model_name)
         decoder = DEIMTransformer(
@@ -157,4 +160,4 @@ def _create_model(self, num_classes: int | None = None) -> DETR:
 
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(640, 640), mean=(123.675, 116.280, 103.530), std=(58.395, 57.120, 57.375))
+        return DataInputParams(input_size=(640, 640), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
diff --git a/library/src/otx/backend/native/models/detection/detectors/single_stage_detector.py b/library/src/otx/backend/native/models/detection/detectors/single_stage_detector.py
index cffc5c57ad4..76f5cbce980 100644
--- a/library/src/otx/backend/native/models/detection/detectors/single_stage_detector.py
+++ b/library/src/otx/backend/native/models/detection/detectors/single_stage_detector.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 # Copyright (c) OpenMMLab. All rights reserved.
@@ -11,7 +11,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 
 import torch
 
@@ -148,7 +148,7 @@ def loss(
         Returns:
             dict: A dictionary of loss components.
         """
-        x = self.extract_feat(entity.images)
+        x = self.extract_feat(cast("torch.Tensor", entity.images))
         # TODO (sungchul): compare .loss with other forwards and remove duplicated code
         outputs: dict[str, Tensor] = self.bbox_head.prepare_loss_inputs(x, entity)
 
@@ -182,7 +182,7 @@ def predict(
                 - bboxes (Tensor): Has a shape (num_instances, 4),
                     the last dimension 4 arrange as (x1, y1, x2, y2).
         """
-        x = self.extract_feat(entity.images)
+        x = self.extract_feat(cast("torch.Tensor", entity.images))
         return self.bbox_head.predict(x, entity, rescale=rescale)
 
     def export(
diff --git a/library/src/otx/backend/native/models/detection/rfdetr.py b/library/src/otx/backend/native/models/detection/rfdetr.py
index 7045efd062a..5af75663d2f 100644
--- a/library/src/otx/backend/native/models/detection/rfdetr.py
+++ b/library/src/otx/backend/native/models/detection/rfdetr.py
@@ -88,7 +88,7 @@ class RFDETR(RFDETRMixin, OTXDetectionModel):  # pyrefly: ignore[inconsistent-in
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "rfdetr_nano",
             "rfdetr_small",
@@ -156,8 +156,8 @@ def _exporter(self) -> OTXModelExporter:
     @property
     def _default_preprocessing_params(self) -> dict[str, DataInputParams]:  # type: ignore[override]
         """Default preprocessing parameters for RF-DETR models."""
-        imagenet_mean = (123.675, 116.28, 103.53)
-        imagenet_std = (58.395, 57.12, 57.375)
+        imagenet_mean = (0.485, 0.456, 0.406)
+        imagenet_std = (0.229, 0.224, 0.225)
 
         return {
             "rfdetr_nano": DataInputParams(
diff --git a/library/src/otx/backend/native/models/detection/rtdetr.py b/library/src/otx/backend/native/models/detection/rtdetr.py
index 8961fb93dd1..2d5609d2768 100644
--- a/library/src/otx/backend/native/models/detection/rtdetr.py
+++ b/library/src/otx/backend/native/models/detection/rtdetr.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024-2025 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """RTDetr model implementations."""
@@ -27,7 +27,6 @@
 from otx.config.data import TileConfig
 from otx.data.entity.base import OTXBatchLossEntity
 from otx.data.entity.sample import OTXPredictionBatch, OTXSampleBatch
-from otx.data.entity.utils import stack_batch
 from otx.metrics.fmeasure import MeanAveragePrecisionFMeasureCallable
 
 if TYPE_CHECKING:
@@ -47,7 +46,7 @@ class RTDETR(OTXDetectionModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels.
-        data_input_params (DataInputParams | None): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None, uses _default_preprocessing_params.
         model_name (literal, optional): Name of the model to use. Defaults to "rtdetr_50".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
@@ -70,7 +69,7 @@ class RTDETR(OTXDetectionModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["rtdetr_18", "rtdetr_50", "rtdetr_101"] = "rtdetr_50",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -113,6 +112,9 @@ def _create_model(self, num_classes: int | None = None) -> DETR:
             {"params": "^(?=.*(?:encoder|decoder))(?=.*(?:norm|bias)).*$", "weight_decay": 0.0},
         ]
 
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         model = DETR(
             multi_scale=self.multi_scale,
             backbone=backbone,
@@ -133,15 +135,6 @@ def _customize_inputs(
         pad_size_divisor: int = 32,
         pad_value: int = 0,
     ) -> dict[str, Any]:
-        # Stack images if they're in list format
-        if isinstance(entity.images, list):
-            entity.images, entity.imgs_info = stack_batch(  # type: ignore[assignment]
-                entity.images,
-                entity.imgs_info,  # type: ignore[arg-type]
-                pad_size_divisor=pad_size_divisor,
-                pad_value=pad_value,
-            )
-
         targets: list[dict[str, Any]] = []
         # prepare bboxes for the model
         if entity.bboxes is not None and entity.labels is not None:
diff --git a/library/src/otx/backend/native/models/detection/rtmdet.py b/library/src/otx/backend/native/models/detection/rtmdet.py
index b8ac10a4505..ff0bf5a8e2c 100644
--- a/library/src/otx/backend/native/models/detection/rtmdet.py
+++ b/library/src/otx/backend/native/models/detection/rtmdet.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """RTMDet model implementations."""
@@ -43,7 +43,7 @@ class RTMDet(OTXDetectionModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels.
-        data_input_params (DataInputParams | None): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None, uses _default_preprocessing_params.
         model_name (str, optional): Name of the model to use. Defaults to "rtmdet_tiny".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
@@ -63,7 +63,7 @@ class RTMDet(OTXDetectionModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["rtmdet_tiny"] = "rtmdet_tiny",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -161,4 +161,4 @@ def _export_parameters(self) -> TaskLevelExportParameters:
 
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(640, 640), mean=(103.53, 116.28, 123.675), std=(57.375, 57.12, 58.395))
+        return DataInputParams(input_size=(640, 640), mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229))
diff --git a/library/src/otx/backend/native/models/detection/ssd.py b/library/src/otx/backend/native/models/detection/ssd.py
index 0d5342fc487..778e3ab6d26 100644
--- a/library/src/otx/backend/native/models/detection/ssd.py
+++ b/library/src/otx/backend/native/models/detection/ssd.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 # Copyright (c) OpenMMLab. All rights reserved.
@@ -53,7 +53,7 @@ class SSD(OTXDetectionModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels.
-        data_input_params (DataInputParams | None, optional): Parameters for image preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for image preprocessing.
             This parameter contains image input size, mean, and std, that is used to preprocess the input image.
             If None is given, default parameters for the specific model will be used.
             In most cases you don't need to set this parameter unless you change the image size or pretrained weights.
@@ -75,7 +75,7 @@ class SSD(OTXDetectionModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["ssd_mobilenetv2"] = "ssd_mobilenetv2",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -210,7 +210,7 @@ def _get_new_anchors(self, dataset: OTXDataset, anchor_generator: SSDAnchorGener
         """Get new anchors for SSD from OTXDataset."""
         from torchvision.transforms.v2._container import Compose
 
-        from otx.data.transform_libs.torchvision import Resize
+        from otx.data.augmentation.transforms import Resize
 
         target_wh = None
         if isinstance(dataset.transforms, Compose):
@@ -218,6 +218,9 @@ def _get_new_anchors(self, dataset: OTXDataset, anchor_generator: SSDAnchorGener
                 if isinstance(transform, Resize):
                     target_wh = transform.scale
         if target_wh is None:
+            if self.data_input_params.input_size is None:
+                msg = "input_size should not be None."
+                raise ValueError(msg)
             target_wh = list(reversed(self.data_input_params.input_size))  # type: ignore[assignment]
             msg = f"Cannot get target_wh from the dataset. Assign it with the default value: {target_wh}"
             logger.warning(msg)
@@ -374,4 +377,4 @@ def on_load_checkpoint(self, checkpoint: dict[str, Any]) -> None:
 
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(864, 864), mean=(0.0, 0.0, 0.0), std=(255.0, 255.0, 255.0))
+        return DataInputParams(input_size=(864, 864), mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0))
diff --git a/library/src/otx/backend/native/models/detection/yolox.py b/library/src/otx/backend/native/models/detection/yolox.py
index 14153c98ebc..9601fe1f62c 100644
--- a/library/src/otx/backend/native/models/detection/yolox.py
+++ b/library/src/otx/backend/native/models/detection/yolox.py
@@ -1,11 +1,12 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """YOLOX model implementations."""
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, ClassVar, Literal
+import dataclasses
+from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast
 
 from torch.export import Dim
 
@@ -30,6 +31,7 @@
 if TYPE_CHECKING:
     from pathlib import Path
 
+    import torch
     from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable
 
     from otx.backend.native.schedulers import LRSchedulerListCallable
@@ -37,6 +39,13 @@
     from otx.types.label import LabelInfoTypes
 
 
+# YOLOX-S/L/X pretrained weights (MMDet) were trained on raw [0, 255] BGR images —
+# no ImageNet normalization was applied during pretraining.
+# These models are NOT compatible with 16-bit images because the uint8 pixel range
+# assumption is baked into the weights.
+_RAW_UINT8_MODELS: frozenset[str] = frozenset({"yolox_s", "yolox_l", "yolox_x"})
+
+
 class YOLOX(OTXDetectionModel):
     """OTX Detection model class for YOLOX.
 
@@ -45,7 +54,7 @@ class YOLOX(OTXDetectionModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels.
-        data_input_params (DataInputParams | None, optional): Parameters for image preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for image preprocessing.
             This parameter contains image input size, mean, and std, that is used to preprocess the input image.
             If None is given, default parameters for the specific model will be used.
             In most cases you don't need to set this parameter unless you change the image size or pretrained weights.
@@ -75,7 +84,7 @@ class YOLOX(OTXDetectionModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["yolox_tiny", "yolox_s", "yolox_l", "yolox_x"] = "yolox_s",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -130,14 +139,6 @@ def _create_model(self, num_classes: int | None = None) -> SingleStageDetector:
 
         return model
 
-    def _customize_inputs(
-        self,
-        entity: OTXSampleBatch,
-        pad_size_divisor: int = 32,
-        pad_value: int = 114,  # YOLOX uses 114 as pad_value
-    ) -> dict[str, Any]:
-        return super()._customize_inputs(entity=entity, pad_size_divisor=pad_size_divisor, pad_value=pad_value)
-
     @property
     def _exporter(self) -> OTXModelExporter:
         """Creates OTXModelExporter object that can export the model."""
@@ -197,12 +198,31 @@ def export(
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
         return {
-            "yolox_tiny": DataInputParams(
-                input_size=(640, 640), mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375)
-            ),
-            # TODO(@kprokofi): this looks like a bug. The image should be normalized before training.
-            # issue: https://github.com/open-edge-platform/training_extensions/issues/5023
+            "yolox_tiny": DataInputParams(input_size=(640, 640), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
             "yolox_s": DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)),
             "yolox_l": DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)),
             "yolox_x": DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(1.0, 1.0, 1.0)),
         }
+
+    def _customize_inputs(self, entity: OTXSampleBatch) -> dict[str, Any]:
+        if self.model_name in _RAW_UINT8_MODELS:
+            if entity.imgs_info is not None:
+                for info in entity.imgs_info:
+                    if info is not None and getattr(info, "bit_depth", 8) > 8:
+                        msg = (
+                            f"YOLOX ({self.model_name}) does not support images with bit_depth > 8. "
+                            f"Got bit_depth={info.bit_depth}. "
+                            "Pretrained weights require [0, 255] uint8-range inputs. "
+                            "Use yolox_tiny or a model with normalization for 16-bit images."
+                        )
+                        raise RuntimeError(msg)
+
+            inputs = super()._customize_inputs(entity)
+            # The CPU pipeline always scales images to [0, 1] float.
+            # YOLOX-S/L/X pretrained weights expect [0, 255] float, so rescale here.
+            # We create a new entity so the original (with [0, 1] images) remains intact
+            images_255 = cast("torch.Tensor", entity.images).mul(255.0)
+            inputs["entity"] = dataclasses.replace(inputs["entity"], images=images_255)
+            return inputs
+
+        return super()._customize_inputs(entity)
diff --git a/library/src/otx/backend/native/models/instance_segmentation/base.py b/library/src/otx/backend/native/models/instance_segmentation/base.py
index 838a2eac940..6d792ba0b33 100644
--- a/library/src/otx/backend/native/models/instance_segmentation/base.py
+++ b/library/src/otx/backend/native/models/instance_segmentation/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Class definition for instance segmentation model entity used in OTX."""
@@ -11,7 +11,7 @@
 import logging as log
 import types
 from contextlib import contextmanager
-from typing import TYPE_CHECKING, Any, Callable, Iterator, Literal, Sequence
+from typing import TYPE_CHECKING, Any, Callable, Iterator, Literal, Sequence, cast
 
 import torch
 from torch import Tensor
@@ -58,7 +58,7 @@ class OTXInstanceSegModel(OTXModel):
         label_info (LabelInfoTypes | int | Sequence): Information about the labels used in the model.
             If `int` is given, label info will be constructed from number of classes,
             if `Sequence` is given, label info will be constructed from the sequence of label names.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None is given, default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "inst_segm_model".
         optimizer (OptimizerCallable, optional): Optimizer for the model. Defaults to DefaultOptimizerCallable.
@@ -74,7 +74,7 @@ class OTXInstanceSegModel(OTXModel):
     def __init__(
         self,
         label_info: LabelInfoTypes | int | Sequence,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "inst_segm_model",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -85,7 +85,6 @@ def __init__(
         super().__init__(
             label_info=label_info,
             data_input_params=data_input_params,
-            task=OTXTaskType.INSTANCE_SEGMENTATION,
             model_name=model_name,
             optimizer=optimizer,
             scheduler=scheduler,
@@ -470,16 +469,9 @@ def _convert_pred_entity_to_compute_metric(
 
     def get_dummy_input(self, batch_size: int = 1) -> OTXSampleBatch:  # type: ignore[override]
         """Returns a dummy input for instance segmentation model."""
-        images = [torch.rand(3, *self.data_input_params.input_size) for _ in range(batch_size)]
-        infos = []
-        for i, img in enumerate(images):
-            infos.append(
-                ImageInfo(
-                    img_idx=i,
-                    img_shape=img.shape,
-                    ori_shape=img.shape,
-                ),
-            )
+        images = torch.stack([torch.rand(3, *self.data_input_params.input_size) for _ in range(batch_size)])
+        img_shape = (images.shape[2], images.shape[3])
+        infos = [ImageInfo(img_idx=i, img_shape=img_shape, ori_shape=img_shape) for i in range(batch_size)]
         return OTXSampleBatch(images=images, imgs_info=infos)
 
     def forward_explain(self, inputs: OTXSampleBatch) -> OTXPredictionBatch:
@@ -510,7 +502,8 @@ def _forward_explain_inst_seg(
         mode: str = "tensor",  # noqa: ARG004
     ) -> dict[str, Tensor]:
         """Forward func of the BaseDetector instance, which located in is in ExplainableOTXInstanceSegModel().model."""
-        x = self.backbone(entity.images) if isinstance(self, MaskRCNN) else self.extract_feat(entity.images)
+        _images = cast("torch.Tensor", entity.images)
+        x = self.backbone(_images) if isinstance(self, MaskRCNN) else self.extract_feat(_images)
 
         feature_vector = self.feature_vector_fn(x)
         predictions = self.get_results_from_head(x, entity)
@@ -553,7 +546,7 @@ def get_results_from_head(
         if isinstance(self, MaskRCNNTV):
             ori_shapes = [img_info.ori_shape for img_info in entity.imgs_info]  # type: ignore[union-attr]
             img_shapes = [img_info.img_shape for img_info in entity.imgs_info]  # type: ignore[union-attr]
-            image_list = ImageList(entity.images, img_shapes)
+            image_list = ImageList(cast("torch.Tensor", entity.images), img_shapes)
             proposals, _ = self.model.rpn(image_list, x)
             detections, _ = self.model.roi_heads(
                 x,
@@ -616,4 +609,9 @@ def _restore_model_forward(self) -> None:
 
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(1024, 1024), mean=(103.53, 116.28, 123.675), std=(57.375, 57.12, 58.395))
+        return DataInputParams(input_size=(1024, 1024), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
+
+    @property
+    def task(self) -> OTXTaskType:
+        """Return task type."""
+        return OTXTaskType.INSTANCE_SEGMENTATION
diff --git a/library/src/otx/backend/native/models/instance_segmentation/maskrcnn.py b/library/src/otx/backend/native/models/instance_segmentation/maskrcnn.py
index 1dab07d1386..592a85fb7fe 100644
--- a/library/src/otx/backend/native/models/instance_segmentation/maskrcnn.py
+++ b/library/src/otx/backend/native/models/instance_segmentation/maskrcnn.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """MaskRCNN model implementations."""
@@ -48,7 +48,7 @@ class MaskRCNN(OTXInstanceSegModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels used in the model.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None is given, default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "maskrcnn_resnet_50".
         optimizer (OptimizerCallable, optional): Optimizer for the model. Defaults to DefaultOptimizerCallable.
@@ -74,7 +74,7 @@ class MaskRCNN(OTXInstanceSegModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "maskrcnn_resnet_50",
             "maskrcnn_efficientnet_b2b",
@@ -391,16 +391,16 @@ def _optimization_config(self) -> dict[str, Any]:
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
         return {
             "maskrcnn_resnet_50": DataInputParams(
-                input_size=(1024, 1024), mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375)
+                input_size=(1024, 1024), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
             ),
             # TODO(@kprokofi): The std values of (1.0, 1.0, 1.0) for maskrcnn_efficientnet_b2b
             # differ from other variants which use (58.395, 57.12, 57.375), which may indicate missing normalization.
             # issue: https://github.com/open-edge-platform/training_extensions/issues/5023
             "maskrcnn_efficientnet_b2b": DataInputParams(
-                input_size=(1024, 1024), mean=(123.675, 116.28, 103.53), std=(1.0, 1.0, 1.0)
+                input_size=(1024, 1024), mean=(0.485, 0.456, 0.406), std=(1.0, 1.0, 1.0)
             ),
             "maskrcnn_swin_tiny": DataInputParams(
-                input_size=(1344, 1344), mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375)
+                input_size=(1344, 1344), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
             ),
         }
 
diff --git a/library/src/otx/backend/native/models/instance_segmentation/maskrcnn_tv.py b/library/src/otx/backend/native/models/instance_segmentation/maskrcnn_tv.py
index 5c8e62d4a1d..0106121aabb 100644
--- a/library/src/otx/backend/native/models/instance_segmentation/maskrcnn_tv.py
+++ b/library/src/otx/backend/native/models/instance_segmentation/maskrcnn_tv.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """TV MaskRCNN model implementations."""
@@ -51,7 +51,7 @@ class MaskRCNNTV(OTXInstanceSegModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels used in the model.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None is given, default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "maskrcnn_resnet_50".
         optimizer (OptimizerCallable, optional): Optimizer for the model. Defaults to DefaultOptimizerCallable.
@@ -71,7 +71,7 @@ class MaskRCNNTV(OTXInstanceSegModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["maskrcnn_resnet_50"] = "maskrcnn_resnet_50",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
diff --git a/library/src/otx/backend/native/models/instance_segmentation/rfdetr_inst.py b/library/src/otx/backend/native/models/instance_segmentation/rfdetr_inst.py
index 09919578109..0bbe19761b4 100644
--- a/library/src/otx/backend/native/models/instance_segmentation/rfdetr_inst.py
+++ b/library/src/otx/backend/native/models/instance_segmentation/rfdetr_inst.py
@@ -89,7 +89,7 @@ class RFDETRInst(RFDETRMixin, OTXInstanceSegModel):  # pyrefly: ignore[inconsist
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal[
             "rfdetr_seg_n",
             "rfdetr_seg_s",
@@ -156,8 +156,8 @@ def _exporter(self) -> OTXModelExporter:
     @property
     def _default_preprocessing_params(self) -> dict[str, DataInputParams]:  # type: ignore[override]
         """Default preprocessing parameters for RF-DETR segmentation models."""
-        imagenet_mean = (123.675, 116.28, 103.53)
-        imagenet_std = (58.395, 57.12, 57.375)
+        imagenet_mean = (0.485, 0.456, 0.406)
+        imagenet_std = (0.229, 0.224, 0.225)
 
         return {
             "rfdetr_seg_n": DataInputParams(
diff --git a/library/src/otx/backend/native/models/instance_segmentation/rtmdet_inst.py b/library/src/otx/backend/native/models/instance_segmentation/rtmdet_inst.py
index 752d4728dc1..c7f21e48cd1 100644
--- a/library/src/otx/backend/native/models/instance_segmentation/rtmdet_inst.py
+++ b/library/src/otx/backend/native/models/instance_segmentation/rtmdet_inst.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """RTMDetInst model implementations."""
@@ -43,7 +43,7 @@ class RTMDetInst(OTXInstanceSegModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the labels used in the model.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
             If None is given, default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "rtmdet_inst_tiny".
         optimizer (OptimizerCallable, optional): Optimizer for the model. Defaults to DefaultOptimizerCallable.
@@ -66,7 +66,7 @@ class RTMDetInst(OTXInstanceSegModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["rtmdet_inst_tiny"] = "rtmdet_inst_tiny",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -194,4 +194,4 @@ def forward_for_tracing(self, inputs: Tensor) -> tuple[Tensor, ...]:
 
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(640, 640), mean=(103.53, 116.28, 123.675), std=(57.375, 57.12, 58.395))
+        return DataInputParams(input_size=(640, 640), mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229))
diff --git a/library/src/otx/backend/native/models/instance_segmentation/segmentors/maskrcnn_tv.py b/library/src/otx/backend/native/models/instance_segmentation/segmentors/maskrcnn_tv.py
index 6bd851d5b38..7daeb5437ef 100644
--- a/library/src/otx/backend/native/models/instance_segmentation/segmentors/maskrcnn_tv.py
+++ b/library/src/otx/backend/native/models/instance_segmentation/segmentors/maskrcnn_tv.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Torchvision MaskRCNN model with forward method accepting TorchDataBatch."""
@@ -6,7 +6,7 @@
 from __future__ import annotations
 
 from collections import OrderedDict
-from typing import TYPE_CHECKING, Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar, cast
 
 import torch
 from torch import Tensor, nn
@@ -36,7 +36,7 @@ def forward(self, entity: OTXSampleBatch) -> dict[str, Tensor] | list[dict[str,
         ori_shapes = [img_info.ori_shape for img_info in entity.imgs_info]  # type: ignore[union-attr]
         img_shapes = [img_info.img_shape for img_info in entity.imgs_info]  # type: ignore[union-attr]
 
-        image_list = ImageList(entity.images, img_shapes)
+        image_list = ImageList(cast("torch.Tensor", entity.images), img_shapes)
         targets = []
         if self.training:
             for i in range(len(entity.imgs_info)):  # type: ignore[arg-type]
diff --git a/library/src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py b/library/src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py
index df3cfd5a34f..7e00d98f2da 100644
--- a/library/src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py
+++ b/library/src/otx/backend/native/models/instance_segmentation/segmentors/two_stage.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """TwoStageDetector.
@@ -9,6 +9,8 @@
 
 from __future__ import annotations
 
+from typing import cast
+
 import torch
 from torch import Tensor, nn
 
@@ -156,7 +158,7 @@ def loss(self, batch_inputs: OTXSampleBatch) -> dict[str, Tensor]:
         Returns:
             dict: A dictionary of loss components
         """
-        x = self.extract_feat(batch_inputs.images)
+        x = self.extract_feat(cast("Tensor", batch_inputs.images))
 
         # Copy data entity and set gt_labels to 0 in RPN
         rpn_entity = OTXSampleBatch(
@@ -218,7 +220,7 @@ def predict(
         if not self.with_bbox:
             msg = "Bbox head is not implemented."
             raise NotImplementedError(msg)
-        x = self.extract_feat(entity.images)
+        x = self.extract_feat(cast("Tensor", entity.images))
 
         rpn_results_list = self.rpn_head.predict(x, entity, rescale=False)
 
diff --git a/library/src/otx/backend/native/models/keypoint_detection/base.py b/library/src/otx/backend/native/models/keypoint_detection/base.py
index b2c8e5dd268..94b0ab52ac9 100644
--- a/library/src/otx/backend/native/models/keypoint_detection/base.py
+++ b/library/src/otx/backend/native/models/keypoint_detection/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Class definition for keypoint detection model entity used in OTX."""
@@ -32,8 +32,8 @@ class OTXKeypointDetectionModel(OTXModel):
         label_info (LabelInfoTypes | int | Sequence): Information about the labels used in the model.
             If `int` is given, label info will be constructed from number of classes,
             if `Sequence` is given, label info will be constructed from the sequence of label names.
-        data_input_params (DataInputParams | None, optional): Parameters for image data preprocessing. If None is given,
-            default parameters for the specific model will be used.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for image data
+            preprocessing. If None is given, default parameters for the specific model will be used.
         model_name (str, optional): Name of the model. Defaults to "keypoint_detection_model".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
@@ -46,7 +46,7 @@ class OTXKeypointDetectionModel(OTXModel):
     def __init__(
         self,
         label_info: LabelInfoTypes | int | Sequence,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "keypoint_detection_model",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -56,7 +56,6 @@ def __init__(
         super().__init__(
             label_info=label_info,
             data_input_params=data_input_params,
-            task=OTXTaskType.KEYPOINT_DETECTION,
             model_name=model_name,
             optimizer=optimizer,
             scheduler=scheduler,
@@ -94,6 +93,9 @@ def _customize_outputs(
         if inputs.imgs_info is None:
             msg = "The input image information is not provided."
             raise ValueError(msg)
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         for i, output in enumerate(outputs):
             if not isinstance(output, tuple):
                 raise TypeError(output)
@@ -130,6 +132,9 @@ def _customize_outputs(
     def configure_metric(self) -> None:
         """Configure the metric."""
         super().configure_metric()
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         self._metric.input_size = tuple(self.data_input_params.input_size)
 
     def _convert_pred_entity_to_compute_metric(  # type: ignore[override]
@@ -209,4 +214,9 @@ def _export_parameters(self) -> TaskLevelExportParameters:
 
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(255.0, 255.0, 255.0))
+        return DataInputParams(input_size=(512, 512), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
+
+    @property
+    def task(self) -> OTXTaskType:
+        """Return task type."""
+        return OTXTaskType.KEYPOINT_DETECTION
diff --git a/library/src/otx/backend/native/models/keypoint_detection/rtmpose.py b/library/src/otx/backend/native/models/keypoint_detection/rtmpose.py
index c7981e97899..6d9ad3bfc5b 100644
--- a/library/src/otx/backend/native/models/keypoint_detection/rtmpose.py
+++ b/library/src/otx/backend/native/models/keypoint_detection/rtmpose.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """RTMPose model implementations."""
@@ -35,14 +35,10 @@ class RTMPose(OTXKeypointDetectionModel):
         "rtmpose_tiny": "https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth",
     }
 
-    _default_preprocessing_params: ClassVar[dict[str, DataInputParams] | DataInputParams] = {
-        "rtmpose_tiny": DataInputParams(input_size=(640, 640), mean=(0.0, 0.0, 0.0), std=(255.0, 255.0, 255.0)),
-    }
-
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["rtmpose_tiny"] = "rtmpose_tiny",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -61,6 +57,9 @@ def __init__(
 
     def _create_model(self, num_classes: int | None = None) -> nn.Module:
         num_classes = num_classes if num_classes is not None else self.num_classes
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         backbone = CSPNeXt(model_name=self.model_name)
         head = RTMCCHead(
             out_channels=num_classes,
diff --git a/library/src/otx/backend/native/models/segmentation/base.py b/library/src/otx/backend/native/models/segmentation/base.py
index 3d71603fd5d..b7456959cb3 100644
--- a/library/src/otx/backend/native/models/segmentation/base.py
+++ b/library/src/otx/backend/native/models/segmentation/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Class definition for detection model entity used in OTX."""
@@ -45,7 +45,7 @@ class OTXSegmentationModel(OTXModel):
         label_info (LabelInfoTypes | int | Sequence): Information about the labels used in the model.
             If `int` is given, label info will be constructed from number of classes,
             if `Sequence` is given, label info will be constructed from the sequence of label names.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
         model_name (str, optional): Name of the model. Defaults to "otx_segmentation_model".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
@@ -58,7 +58,7 @@ class OTXSegmentationModel(OTXModel):
     def __init__(
         self,
         label_info: LabelInfoTypes | int | Sequence,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: str = "otx_segmentation_model",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -69,7 +69,6 @@ def __init__(
         super().__init__(
             label_info=label_info,
             data_input_params=data_input_params,
-            task=OTXTaskType.SEMANTIC_SEGMENTATION,
             model_name=model_name,
             optimizer=optimizer,
             scheduler=scheduler,
@@ -179,11 +178,7 @@ def _convert_pred_entity_to_compute_metric(
             raise ValueError(msg)
 
         return [
-            {
-                "preds": pred_mask,
-                "target": target_mask,
-            }
-            for pred_mask, target_mask in zip(preds.masks, inputs.masks)
+            {"preds": pred_mask, "target": target_mask} for pred_mask, target_mask in zip(preds.masks, inputs.masks)
         ]
 
     @staticmethod
@@ -297,4 +292,9 @@ def get_dummy_input(self, batch_size: int = 1) -> OTXSampleBatch:  # type: ignor
 
     @property
     def _default_preprocessing_params(self) -> DataInputParams | dict[str, DataInputParams]:
-        return DataInputParams(input_size=(512, 512), mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375))
+        return DataInputParams(input_size=(512, 512), mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
+
+    @property
+    def task(self) -> OTXTaskType:
+        """Return task type."""
+        return OTXTaskType.SEMANTIC_SEGMENTATION
diff --git a/library/src/otx/backend/native/models/segmentation/dino_v2_seg.py b/library/src/otx/backend/native/models/segmentation/dino_v2_seg.py
index 266dbfb0830..2b6bb834d78 100644
--- a/library/src/otx/backend/native/models/segmentation/dino_v2_seg.py
+++ b/library/src/otx/backend/native/models/segmentation/dino_v2_seg.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """DinoV2Seg model implementations."""
@@ -35,7 +35,7 @@ class DinoV2Seg(OTXSegmentationModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the hierarchical labels.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
         model_name (Literal, optional): Name of the model. Defaults to "dinov2-small-seg".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
@@ -52,7 +52,7 @@ class DinoV2Seg(OTXSegmentationModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["dinov2-small-seg"] = "dinov2-small-seg",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
@@ -75,6 +75,9 @@ def _create_model(self, num_classes: int | None = None) -> nn.Module:
         # initialize backbones
         num_classes = num_classes if num_classes is not None else self.num_classes
 
+        if self.data_input_params.input_size is None:
+            msg = "input_size should not be None."
+            raise ValueError(msg)
         backbone = VisionTransformerBackbone(model_name=self.model_name, img_size=self.data_input_params.input_size)
         backbone.forward = partial(  # type: ignore[method-assign]
             backbone.get_intermediate_layers,
diff --git a/library/src/otx/backend/native/models/segmentation/litehrnet.py b/library/src/otx/backend/native/models/segmentation/litehrnet.py
index 2bd9859bc3c..46c96283024 100644
--- a/library/src/otx/backend/native/models/segmentation/litehrnet.py
+++ b/library/src/otx/backend/native/models/segmentation/litehrnet.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """LiteHRNet model implementations."""
@@ -34,7 +34,7 @@ class LiteHRNet(OTXSegmentationModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the hierarchical labels.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
         model_name (Literal, optional): Name of the model. Defaults to "lite_hrnet_18".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
@@ -47,7 +47,7 @@ class LiteHRNet(OTXSegmentationModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["lite_hrnet_s", "lite_hrnet_18", "lite_hrnet_x"] = "lite_hrnet_18",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
diff --git a/library/src/otx/backend/native/models/segmentation/segnext.py b/library/src/otx/backend/native/models/segmentation/segnext.py
index 2b1baa33ed8..cbc36467191 100644
--- a/library/src/otx/backend/native/models/segmentation/segnext.py
+++ b/library/src/otx/backend/native/models/segmentation/segnext.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """SegNext model implementations."""
@@ -30,7 +30,7 @@ class SegNext(OTXSegmentationModel):
 
     Args:
         label_info (LabelInfoTypes): Information about the hierarchical labels.
-        data_input_params (DataInputParams | None, optional): Parameters for the image data preprocessing.
+        data_input_params (DataInputParams | dict | None, optional): Parameters for the image data preprocessing.
         model_name (Literal, optional): Name of the model. Defaults to "segnext_small".
         optimizer (OptimizerCallable, optional): Callable for the optimizer. Defaults to DefaultOptimizerCallable.
         scheduler (LRSchedulerCallable | LRSchedulerListCallable, optional): Callable for the learning rate scheduler.
@@ -43,7 +43,7 @@ class SegNext(OTXSegmentationModel):
     def __init__(
         self,
         label_info: LabelInfoTypes,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
         model_name: Literal["segnext_tiny", "segnext_small", "segnext_base"] = "segnext_small",
         optimizer: OptimizerCallable = DefaultOptimizerCallable,
         scheduler: LRSchedulerCallable | LRSchedulerListCallable = DefaultSchedulerCallable,
diff --git a/library/src/otx/backend/openvino/models/base.py b/library/src/otx/backend/openvino/models/base.py
index 7bc666f5cf0..e54097eb779 100644
--- a/library/src/otx/backend/openvino/models/base.py
+++ b/library/src/otx/backend/openvino/models/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2025 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 """Class definition for base model entity used in OTX."""
 
@@ -421,16 +421,9 @@ def get_dummy_input(self, batch_size: int = 1) -> OTXSampleBatch:
         Returns:
             OTXSampleBatch: Dummy input data.
         """
-        images = [torch.rand(3, 224, 224) for _ in range(batch_size)]
-        infos = []
-        for i, img in enumerate(images):
-            infos.append(
-                ImageInfo(
-                    img_idx=i,
-                    img_shape=img.shape,
-                    ori_shape=img.shape,
-                ),
-            )
+        images = torch.stack([torch.rand(3, 224, 224) for _ in range(batch_size)])
+        img_shape = (224, 224)
+        infos = [ImageInfo(img_idx=i, img_shape=img_shape, ori_shape=img_shape) for i in range(batch_size)]
         return OTXSampleBatch(images=images, imgs_info=infos)
 
     def __call__(self, *args, **kwds):
diff --git a/library/src/otx/cli/cli.py b/library/src/otx/cli/cli.py
index 0a2be4e59c3..fc54fb6d81e 100644
--- a/library/src/otx/cli/cli.py
+++ b/library/src/otx/cli/cli.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """CLI entrypoints."""
@@ -15,7 +15,6 @@
 from rich.console import Console
 
 from otx import OTX_LOGO, __version__
-from otx.backend.native.models.base import DataInputParams
 from otx.cli.utils import absolute_path
 from otx.cli.utils.help_formatter import CustomHelpFormatter
 from otx.cli.utils.jsonargparse import get_short_docstring, patch_update_configs
@@ -322,11 +321,15 @@ def instantiate_classes(self, instantiate_engine: bool = True) -> None:
                     "Input size is not specified in the datamodule. Ensure that the datamodule has a valid input size."
                 )
                 raise ValueError(msg)
-            model_config.init_args["data_input_params"] = DataInputParams(
-                input_size=self.datamodule.input_size,
-                mean=self.datamodule.input_mean,
-                std=self.datamodule.input_std,
-            ).as_dict()
+            # Only pass what the datamodule knows; mean/std may be None when
+            # normalization lives in the augmentation pipeline. Model defaults
+            # fill in any missing values inside _configure_preprocessing_params.
+            _dip: dict[str, Any] = {"input_size": self.datamodule.input_size}
+            if self.datamodule.input_mean is not None:
+                _dip["mean"] = self.datamodule.input_mean
+            if self.datamodule.input_std is not None:
+                _dip["std"] = self.datamodule.input_std
+            model_config.init_args["data_input_params"] = _dip
 
             # Instantiate the model and needed components
             self.model = self.instantiate_model(model_config=model_config)
diff --git a/library/src/otx/cli/utils/jsonargparse.py b/library/src/otx/cli/utils/jsonargparse.py
index d64dd42166b..5fb871c20e5 100644
--- a/library/src/otx/cli/utils/jsonargparse.py
+++ b/library/src/otx/cli/utils/jsonargparse.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Functions related to jsonargparse."""
@@ -230,18 +230,23 @@ def list_override(configs: Namespace, key: str, overrides: list, convert_dict_to
     """
     if key not in configs or configs[key] is None:
         return
+
+    base_list = configs[key]
+    override_class_paths = []
+
     for target in overrides:
         class_path = target.get("class_path", None)
         if class_path is None:
             msg = "class_path is required in the override list."
             raise ValueError(msg)
+        override_class_paths.append(class_path)
 
-        item = next((item for item in configs[key] if item["class_path"] == class_path), None)
+        item = next((item for item in base_list if item["class_path"] == class_path), None)
         if item is not None:
             Namespace(item).update(target)
         else:
             converted_target = dict_to_namespace(target) if convert_dict_to_namespace else target
-            configs[key].append(converted_target)
+            base_list.append(converted_target)
 
 
 def apply_override(cfg: Namespace, overrides: Namespace) -> None:
diff --git a/library/src/otx/config/data.py b/library/src/otx/config/data.py
index 8a238a87862..02b8e0a0662 100644
--- a/library/src/otx/config/data.py
+++ b/library/src/otx/config/data.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Config data type objects for data."""
@@ -11,7 +11,55 @@
 from dataclasses import dataclass, field
 from typing import Any
 
-from otx.types.transformer_libs import TransformLibType
+
+@dataclass
+class IntensityConfig:
+    """Configuration for intensity mapping (uint8, uint16, or other high-bit-depth inputs).
+
+    Controls how raw pixel values are converted to float32 [0, 1] before augmentations.
+    For standard uint8 images the default ``mode="scale_to_unit"`` divides by 255.
+    For high-bit-depth inputs (uint16 thermal, medical, etc.) select an appropriate mode.
+
+    Supported modes:
+        - ``"scale_to_unit"``: Divide by ``max_value`` and clamp to [0, 1].
+          Default for both uint8 (max_value=255) and uint16 (max_value=65535).
+        - ``"window"``: Window/level mapping for CT-style imaging.
+          Clips to ``[window_center - window_width/2, window_center + window_width/2]``
+          then normalizes to [0, 1].
+        - ``"percentile"``: Per-image percentile clipping for microscopy / variable-range data.
+          Clips to ``[percentile_low, percentile_high]`` quantiles then normalizes.
+        - ``"range_scale"``: Multiply by ``scale_factor``, clip to ``[min_value, max_value]``,
+          normalize to [0, 1].  Designed for thermal cameras where raw pixel values
+          need a conversion factor and a physical temperature range (see ``process_raw_thermal.py``).
+
+    Attributes:
+        storage_dtype: Input storage dtype: ``"uint8"`` | ``"uint16"`` | ``"int16"`` | ``"float32"``.
+            Determines the Polars/Datumaro schema used for image decode.
+        max_value: Maximum raw value for ``"scale_to_unit"`` mode.
+            ``None`` = auto (255 for uint8, 65535 for uint16, 32767 for int16).
+        mode: Intensity mapping mode (see above).
+        window_center: Center of the intensity window (``"window"`` mode).
+        window_width: Width of the intensity window (``"window"`` mode).
+        percentile_low: Low percentile for clipping (``"percentile"`` mode, default 1.0).
+        percentile_high: High percentile for clipping (``"percentile"`` mode, default 99.0).
+        scale_factor: Multiplicative factor applied to raw pixels before clipping
+            (``"range_scale"`` mode, e.g. 0.4 for thermal Kelvin conversion).
+        min_value: Minimum physical value after scaling, used as clip lower bound
+            (``"range_scale"`` mode, e.g. 295.15 K).
+        repeat_channels: If > 0, repeat single-channel images to this many channels
+            (e.g. 3 for pretrained RGB backbones). 0 = no repeat.
+    """
+
+    storage_dtype: str = "uint8"
+    max_value: float | None = None
+    mode: str = "scale_to_unit"
+    window_center: float | None = None
+    window_width: float | None = None
+    percentile_low: float = 1.0
+    percentile_high: float = 99.0
+    scale_factor: float = 1.0
+    min_value: float = 0.0
+    repeat_channels: int = 0
 
 
 @dataclass
@@ -38,30 +86,31 @@ class SubsetConfig:
     Attributes:
         batch_size (int): Batch size produced by the dataloader.
         subset_name (str): Datumaro Dataset's subset name for this subset config.
-        transforms (list[dict[str, Any]] | Compose): List of transforms to apply.
-            For `TransformLibType.TORCHVISION`, accepts a list of `torchvision.transforms.v2.*` objects
-            or a `torchvision.transforms.v2.Compose` object.
-        transform_lib_type (TransformLibType): Specifies the transform library type used.
+        augmentations_cpu (list[dict[str, Any]]): CPU-stage augmentations using torchvision.transforms.v2.
+            These run in Dataset workers before collate. Must output fixed-size tensors for batching.
+            Examples: Resize, RandomResizedCrop, intensity mapping transforms.
+        augmentations_gpu (list[dict[str, Any]]): GPU-stage augmentations using Kornia.
+            These run after batch transfer to GPU via Lightning Callback.
+            Examples: RandomHorizontalFlip, ColorJiggle, Normalize.
+        intensity (IntensityConfig): High-bit-depth intensity mapping configuration.
         num_workers (int): Number of worker processes for the dataloader.
         sampler (SamplerConfig): Sampler configuration for the dataloader.
-        to_tv_image (bool): Whether to convert images to torch tensors.
         input_size (tuple[int, int] | None): Input size expected by the model.
-            If `$(input_size)` is present in transforms, it will be replaced with this value.
+            If `$(input_size)` is present in augmentations, it will be replaced with this value.
 
     Example:
         ```python
         train_subset_config = SubsetConfig(
             batch_size=64,
             subset_name="train",
-            transforms=v2.Compose(
-                [
-                    v2.RandomResizedCrop(size=(224, 224), antialias=True),
-                    v2.RandomHorizontalFlip(p=0.5),
-                    v2.ToDtype(torch.float32, scale=True),
-                    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-                ],
-            ),
-            transform_lib_type=TransformLibType.TORCHVISION,
+            augmentations_cpu=[
+                {"class_path": "torchvision.transforms.v2.RandomResizedCrop", "init_args": {"size": (224, 224)}},
+            ],
+            augmentations_gpu=[
+                {"class_path": "kornia.augmentation.RandomHorizontalFlip", "init_args": {"p": 0.5}},
+                {"class_path": "kornia.augmentation.Normalize", "init_args": {"mean": [0.485, 0.456, 0.406],
+                                                                              "std": [0.229, 0.224, 0.225]}},
+            ],
             num_workers=2,
         )
         ```
@@ -69,13 +118,20 @@ class SubsetConfig:
 
     batch_size: int = 6
     subset_name: str = "train"
-    transforms: list[dict[str, Any]] = field(default_factory=list)
-    transform_lib_type: TransformLibType = TransformLibType.TORCHVISION
+    augmentations_cpu: list[dict[str, Any]] = field(default_factory=list)
+    augmentations_gpu: list[dict[str, Any]] = field(default_factory=list)
+    intensity: IntensityConfig = field(default_factory=IntensityConfig)
     num_workers: int = 2
     sampler: SamplerConfig = field(default_factory=SamplerConfig)
-    to_tv_image: bool = True
     input_size: tuple[int, int] | None = None
 
+    def __post_init__(self) -> None:
+        """Normalize nested config objects loaded from recipe dictionaries."""
+        if isinstance(self.intensity, dict):
+            self.intensity = IntensityConfig(**self.intensity)
+        if isinstance(self.sampler, dict):
+            self.sampler = SamplerConfig(**self.sampler)
+
 
 @dataclass
 class TileConfig:
diff --git a/library/src/otx/data/augmentation/__init__.py b/library/src/otx/data/augmentation/__init__.py
new file mode 100644
index 00000000000..bc0aea90fdf
--- /dev/null
+++ b/library/src/otx/data/augmentation/__init__.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""OTX Augmentation module.
+
+This module provides a two-stage augmentation pipeline:
+- CPU stage: Size-dependent augmentations in Dataset workers (torchvision.transforms.v2)
+- GPU stage: Batch-level augmentations via Lightning Callback (Kornia)
+- Intensity mapping: High-bit-depth (uint16) → float32 [0,1] conversion
+"""
+
+# Import kernels module to ensure ImageInfo torchvision kernel registrations are active.
+from otx.data.augmentation.intensity import (
+    PercentileClip,
+    RangeScale,
+    RepeatChannels,
+    ScaleToUnit,
+    WindowLevel,
+    build_intensity_transform,
+)
+from otx.data.augmentation.pipeline import (
+    CPUAugmentationPipeline,
+    GPUAugmentationPipeline,
+)
+
+from . import kernels
+
+__all__ = [
+    "CPUAugmentationPipeline",
+    "GPUAugmentationPipeline",
+    "PercentileClip",
+    "RangeScale",
+    "RepeatChannels",
+    "ScaleToUnit",
+    "WindowLevel",
+    "build_intensity_transform",
+    "kernels",
+]
diff --git a/library/src/otx/data/augmentation/intensity.py b/library/src/otx/data/augmentation/intensity.py
new file mode 100644
index 00000000000..00c8310055c
--- /dev/null
+++ b/library/src/otx/data/augmentation/intensity.py
@@ -0,0 +1,313 @@
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""Intensity mapping transforms for high-bit-depth image support.
+
+Converts raw pixel values (uint8, uint16, int32, etc.) to float32 in [0, 1]
+with domain-specific strategies:
+
+- :class:`ScaleToUnit` — simple divide-by-max (default for uint8 and uint16).
+- :class:`WindowLevel` — CT-style window/level mapping for medical imaging.
+- :class:`PercentileClip` — per-image percentile normalization for microscopy.
+- :class:`RangeScale` — multiply-by-factor + clip to physical range (thermal cameras).
+- :class:`RepeatChannels` — expand single-channel to N-channel for pretrained backbones.
+- :func:`build_intensity_transform` — factory that builds the right pipeline from
+  :class:`~otx.config.data.IntensityConfig`.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+import torch
+from torch import Tensor, nn
+
+if TYPE_CHECKING:
+    from otx.config.data import IntensityConfig
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Auto-detected max values per storage dtype
+# ---------------------------------------------------------------------------
+_AUTO_MAX: dict[str, float] = {
+    "uint8": 255.0,
+    "uint16": 65535.0,
+    "int16": 32767.0,
+    "float32": 1.0,
+}
+
+
+class ScaleToUnit(nn.Module):
+    """Linearly scale raw pixel values to [0, 1].
+
+    ``output = clamp(input.float() / max_value, 0, 1)``
+
+    This is the default intensity mapping for both uint8 (max_value=255) and
+    uint16 (max_value=65535).  It replaces the previously-hardcoded
+    ``torchvision.transforms.v2.functional.to_dtype(…, scale=True)`` with an
+    explicit, dtype-aware alternative.
+
+    Args:
+        max_value: Denominator for the division.  Use 255 for uint8, 65535 for
+            uint16, or any custom value.
+    """
+
+    def __init__(self, max_value: float = 255.0) -> None:
+        super().__init__()
+        self.max_value = max_value
+
+    def forward(self, x: Tensor) -> Tensor:
+        """Scale to [0, 1] float32."""
+        return torch.clamp(x.float() / self.max_value, 0.0, 1.0)
+
+    def extra_repr(self) -> str:
+        """Return extra string representation."""
+        return f"max_value={self.max_value}"
+
+
+class WindowLevel(nn.Module):
+    """Window / level intensity mapping for CT-style medical imaging.
+
+    Maps the raw intensity window ``[center - width/2, center + width/2]``
+    linearly onto ``[0, 1]``.  Values outside the window are clamped.
+
+    Args:
+        center: Centre of the intensity window (in raw pixel units).
+        width: Width of the intensity window (in raw pixel units).
+    """
+
+    def __init__(self, center: float, width: float) -> None:
+        super().__init__()
+        self.center = center
+        self.width = width
+
+    def forward(self, x: Tensor) -> Tensor:
+        """Apply window/level mapping."""
+        low = self.center - self.width / 2.0
+        high = self.center + self.width / 2.0
+        out = (x.float() - low) / (high - low)
+        return torch.clamp(out, 0.0, 1.0)
+
+    def extra_repr(self) -> str:
+        """Return extra string representation."""
+        return f"center={self.center}, width={self.width}"
+
+
+class PercentileClip(nn.Module):
+    """Per-image percentile-based intensity normalization.
+
+    For each image independently:
+    1. Compute the ``low`` and ``high`` percentile values.
+    2. Clip to ``[p_low, p_high]``.
+    3. Normalize to ``[0, 1]``.
+
+    Useful for microscopy, pathology, or any domain where the dynamic range
+    varies strongly between images.
+
+    Args:
+        low: Lower percentile (0-100).  Default 1.0.
+        high: Upper percentile (0-100).  Default 99.0.
+    """
+
+    def __init__(self, low: float = 1.0, high: float = 99.0) -> None:
+        super().__init__()
+        if not 0.0 <= low < high <= 100.0:
+            msg = f"Percentiles must satisfy 0 <= low < high <= 100, got low={low}, high={high}"
+            raise ValueError(msg)
+        self.low = low
+        self.high = high
+
+    def forward(self, x: Tensor) -> Tensor:
+        """Per-image percentile normalization."""
+        x_float = x.float()
+        # Flatten spatial dims for quantile computation, keep channel dim
+        flat = x_float.reshape(-1)
+        p_low = torch.quantile(flat, self.low / 100.0)
+        p_high = torch.quantile(flat, self.high / 100.0)
+        # Avoid division by zero when the image is constant
+        denom = p_high - p_low
+        if denom < 1e-8:
+            return torch.zeros_like(x_float)
+        out = (x_float - p_low) / denom
+        return torch.clamp(out, 0.0, 1.0)
+
+    def extra_repr(self) -> str:
+        """Return extra string representation."""
+        return f"low={self.low}, high={self.high}"
+
+
+class RangeScale(nn.Module):
+    """Multiply-then-clip intensity mapping for thermal / physical-range data.
+
+    Reproduces the thermal pipeline from ``process_raw_thermal.py``::
+
+        scaled = raw_pixels * scale_factor
+        clipped = clip(scaled, min_value, max_value)
+        normalized = (clipped - min_value) / (max_value - min_value)
+
+    For a FLIR A65 camera with Kelvin conversion:
+    ``RangeScale(scale_factor=0.4, min_value=295.15, max_value=360.15)``
+
+    Args:
+        scale_factor: Multiplicative factor applied to raw pixel values.
+        min_value: Lower clip bound (physical units after scaling).
+        max_value: Upper clip bound (physical units after scaling).
+    """
+
+    def __init__(self, scale_factor: float = 1.0, min_value: float = 0.0, max_value: float = 1.0) -> None:
+        super().__init__()
+        if max_value <= min_value:
+            msg = f"max_value must be > min_value, got min_value={min_value}, max_value={max_value}"
+            raise ValueError(msg)
+        self.scale_factor = scale_factor
+        self.min_value = min_value
+        self.max_value = max_value
+
+    def forward(self, x: Tensor) -> Tensor:
+        """Apply scale, clip, normalize."""
+        scaled = x.float() * self.scale_factor
+        clipped = torch.clamp(scaled, self.min_value, self.max_value)
+        return (clipped - self.min_value) / (self.max_value - self.min_value)
+
+    def extra_repr(self) -> str:
+        """Return extra string representation."""
+        return f"scale_factor={self.scale_factor}, min_value={self.min_value}, max_value={self.max_value}"
+
+
+class RepeatChannels(nn.Module):
+    """Repeat single-channel images to N channels.
+
+    Many pretrained backbones expect 3-channel (RGB) input.  Medical and thermal
+    images are often single-channel (grayscale).  This transform repeats the
+    channel dimension so the data is compatible.
+
+    Operates on tensors of shape ``(C, H, W)`` (per-sample) or
+    ``(B, C, H, W)`` (batched).  Only repeats when ``C == 1``; otherwise
+    passes through unchanged.
+
+    Args:
+        num_channels: Target number of channels.  Default 3.
+    """
+
+    def __init__(self, num_channels: int = 3) -> None:
+        super().__init__()
+        self.num_channels = num_channels
+
+    def forward(self, x: Tensor) -> Tensor:
+        """Repeat channel dim if C == 1."""
+        if x.ndim == 3 and x.shape[0] == 1:
+            return x.repeat(self.num_channels, 1, 1)
+        if x.ndim == 4 and x.shape[1] == 1:
+            return x.repeat(1, self.num_channels, 1, 1)
+        return x
+
+    def extra_repr(self) -> str:
+        """Return extra string representation."""
+        return f"num_channels={self.num_channels}"
+
+
+# ---------------------------------------------------------------------------
+# Factory
+# ---------------------------------------------------------------------------
+
+
+def build_intensity_transform(config: IntensityConfig) -> nn.Sequential:
+    """Build an intensity-mapping transform pipeline from :class:`IntensityConfig`.
+
+    The returned ``nn.Sequential`` converts a raw-dtype image tensor to
+    ``float32`` in ``[0, 1]`` and optionally repeats channels.
+
+    For ``storage_dtype="uint8"`` with ``mode="scale_to_unit"`` (the default),
+    this produces a :class:`ScaleToUnit(255)` which is functionally equivalent
+    to the old ``torchvision.transforms.v2.functional.to_dtype(…, scale=True)``.
+
+    Args:
+        config: An :class:`~otx.config.data.IntensityConfig` instance.
+
+    Returns:
+        ``nn.Sequential`` of intensity transforms ready to prepend to the
+        CPU augmentation pipeline.
+
+    Raises:
+        ValueError: If the ``mode`` is unknown or required fields are missing.
+
+    Examples:
+        Standard uint8 (no-op equivalent to prior behavior)::
+
+            cfg = IntensityConfig()  # defaults
+            t = build_intensity_transform(cfg)  # ScaleToUnit(255)
+
+        Thermal (FLIR A65)::
+
+            cfg = IntensityConfig(
+                storage_dtype="uint16",
+                mode="range_scale",
+                scale_factor=0.4,
+                min_value=295.15,
+                max_value=360.15,
+                repeat_channels=3,
+            )
+            t = build_intensity_transform(cfg)
+
+        Medical CT::
+
+            cfg = IntensityConfig(
+                storage_dtype="uint16",
+                mode="window",
+                window_center=40.0,
+                window_width=400.0,
+                repeat_channels=3,
+            )
+            t = build_intensity_transform(cfg)
+    """
+    mode = config.mode
+    storage_dtype = config.storage_dtype
+    transforms: list[nn.Module] = []
+
+    if mode == "scale_to_unit":
+        max_value = config.max_value
+        if max_value is None:
+            max_value = _AUTO_MAX.get(storage_dtype)
+            if max_value is None:
+                msg = (
+                    f"Cannot auto-detect max_value for storage_dtype={storage_dtype!r}. "
+                    "Please set IntensityConfig.max_value explicitly."
+                )
+                raise ValueError(msg)
+        transforms.append(ScaleToUnit(max_value=max_value))
+
+    elif mode == "window":
+        if config.window_center is None or config.window_width is None:
+            msg = "IntensityConfig mode='window' requires both window_center and window_width."
+            raise ValueError(msg)
+        transforms.append(WindowLevel(center=config.window_center, width=config.window_width))
+
+    elif mode == "percentile":
+        transforms.append(PercentileClip(low=config.percentile_low, high=config.percentile_high))
+
+    elif mode == "range_scale":
+        if config.max_value is None:
+            msg = "IntensityConfig mode='range_scale' requires max_value to be set."
+            raise ValueError(msg)
+        transforms.append(
+            RangeScale(
+                scale_factor=config.scale_factor,
+                min_value=config.min_value,
+                max_value=config.max_value,
+            )
+        )
+    else:
+        msg = (
+            f"Unknown IntensityConfig mode: {mode!r}. "
+            "Supported: 'scale_to_unit', 'window', 'percentile', 'range_scale'."
+        )
+        raise ValueError(msg)
+
+    # Optional channel repetition (e.g. grayscale → 3ch for pretrained backbones)
+    if config.repeat_channels > 0:
+        transforms.append(RepeatChannels(num_channels=config.repeat_channels))
+
+    logger.info("Built intensity transform pipeline: %s", transforms)
+    return nn.Sequential(*transforms)
diff --git a/library/src/otx/data/augmentation/kernels.py b/library/src/otx/data/augmentation/kernels.py
new file mode 100644
index 00000000000..4cc891613db
--- /dev/null
+++ b/library/src/otx/data/augmentation/kernels.py
@@ -0,0 +1,90 @@
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""Torchvision kernel registrations for OTX data entities.
+
+This module registers torchvision v2 functional kernels for OTX-specific
+tensor types (``ImageInfo``), enabling these types to be processed by
+standard torchvision transforms.
+"""
+
+from __future__ import annotations
+
+import torchvision.transforms.v2.functional as F  # noqa: N812
+
+from otx.data.entity.base import ImageInfo
+
+
+@F.register_kernel(functional=F.resize, tv_tensor_cls=ImageInfo)
+def _resize_image_info(image_info: ImageInfo, size: list[int], **kwargs) -> ImageInfo:  # noqa: ARG001
+    """Register ImageInfo to TorchVision v2 resize kernel."""
+    if len(size) == 2:
+        image_info.img_shape = (size[0], size[1])
+    elif len(size) == 1:
+        image_info.img_shape = (size[0], size[0])
+    else:
+        raise ValueError(size)
+
+    ori_h, ori_w = image_info.ori_shape
+    new_h, new_w = image_info.img_shape
+    image_info.scale_factor = (new_h / ori_h, new_w / ori_w)
+    return image_info
+
+
+@F.register_kernel(functional=F.crop, tv_tensor_cls=ImageInfo)
+def _crop_image_info(
+    image_info: ImageInfo,
+    height: int,
+    width: int,
+    **kwargs,  # noqa: ARG001
+) -> ImageInfo:
+    """Register ImageInfo to TorchVision v2 crop kernel."""
+    image_info.img_shape = (height, width)
+    image_info.scale_factor = None
+    return image_info
+
+
+@F.register_kernel(functional=F.resized_crop, tv_tensor_cls=ImageInfo)
+def _resized_crop_image_info(
+    image_info: ImageInfo,
+    size: list[int],
+    **kwargs,  # noqa: ARG001
+) -> ImageInfo:
+    """Register ImageInfo to TorchVision v2 resized_crop kernel."""
+    if len(size) == 2:
+        image_info.img_shape = (size[0], size[1])
+    elif len(size) == 1:
+        image_info.img_shape = (size[0], size[0])
+    else:
+        raise ValueError(size)
+
+    image_info.scale_factor = None
+    return image_info
+
+
+@F.register_kernel(functional=F.center_crop, tv_tensor_cls=ImageInfo)
+def _center_crop_image_info(
+    image_info: ImageInfo,
+    output_size: list[int],
+    **kwargs,  # noqa: ARG001
+) -> ImageInfo:
+    """Register ImageInfo to TorchVision v2 center_crop kernel."""
+    img_shape = F._geometry._center_crop_parse_output_size(output_size)  # noqa: SLF001
+    image_info.img_shape = (img_shape[0], img_shape[1])
+
+    image_info.scale_factor = None
+    return image_info
+
+
+@F.register_kernel(functional=F.pad, tv_tensor_cls=ImageInfo)
+def _pad_image_info(
+    image_info: ImageInfo,
+    padding: int | list[int],
+    **kwargs,  # noqa: ARG001
+) -> ImageInfo:
+    """Register ImageInfo to TorchVision v2 pad kernel."""
+    left, right, top, bottom = F._geometry._parse_pad_padding(padding)  # noqa: SLF001
+    height, width = image_info.img_shape
+    image_info.padding = (left, top, right, bottom)
+    image_info.img_shape = (height + top + bottom, width + left + right)
+    return image_info
diff --git a/library/src/otx/data/augmentation/pipeline.py b/library/src/otx/data/augmentation/pipeline.py
new file mode 100644
index 00000000000..c7fa79e5a57
--- /dev/null
+++ b/library/src/otx/data/augmentation/pipeline.py
@@ -0,0 +1,818 @@
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""CPU and GPU augmentation pipelines for OTX.
+
+This module provides the core augmentation pipeline classes:
+- CPUAugmentationPipeline: Runs in Dataset workers using torchvision.transforms.v2
+- GPUAugmentationPipeline: Runs on GPU using Kornia (to be implemented)
+"""
+
+from __future__ import annotations
+
+import ast
+import operator
+import typing
+from copy import copy, deepcopy
+from inspect import isclass
+from typing import TYPE_CHECKING, Any, Callable
+
+import kornia.augmentation as K  # noqa: N812
+import torch
+import torchvision.transforms.v2 as tvt_v2
+import typeguard
+from kornia.augmentation.container import ops
+from lightning.pytorch.cli import instantiate_class
+from omegaconf import DictConfig
+from torch import nn
+
+from otx.config.data import IntensityConfig
+from otx.data.augmentation.intensity import build_intensity_transform
+from otx.data.entity.sample import OTXSample
+from otx.data.utils import import_object_from_module
+
+if TYPE_CHECKING:
+    from otx.config.data import SubsetConfig
+
+
+_KORNIA_PATCHED = False
+_original_transform_list = ops.MaskSequentialOps.transform_list
+
+
+def _fixed_transform_list(cls, input, module, param, extra_args=None):  # noqa: ANN001, ANN202, A002
+    """Fixed version that slices transform_matrix for each list element."""
+    if extra_args is None:
+        extra_args = {}
+    if isinstance(module, (K.GeometricAugmentationBase2D,)):
+        tfm_input = []
+        params = cls.get_instance_module_param(param)
+        params_i = deepcopy(params)
+        for i, inp in enumerate(input):
+            params_i["batch_prob"] = params["batch_prob"][i : i + 1]
+            transform_i = module.transform_matrix[i : i + 1] if module.transform_matrix is not None else None
+            tfm_inp = module.transform_masks(
+                inp, params=params_i, flags=module.flags, transform=transform_i, **extra_args
+            )
+            tfm_input.append(tfm_inp)
+        return tfm_input
+    return _original_transform_list.__func__(cls, input, module, param, extra_args)  # type: ignore[attr-defined]
+
+
+def _ensure_kornia_patched() -> None:
+    """Apply the Kornia MaskSequentialOps monkey-patch on first use."""
+    global _KORNIA_PATCHED  # noqa: PLW0603
+    if _KORNIA_PATCHED:
+        return
+    ops.MaskSequentialOps.transform_list = classmethod(_fixed_transform_list)  # type: ignore[assignment]
+    _KORNIA_PATCHED = True
+
+
+# Mapping from storage_dtype string to bit depth.
+_DTYPE_TO_BIT_DEPTH: dict[str, int] = {
+    "uint8": 8,
+    "uint16": 16,
+    "int16": 16,
+    "float32": 32,
+}
+
+
+def _eval_input_size_str(str_to_eval: str) -> tuple[int, ...] | int:
+    """Safely evaluate an arithmetic expression involving ``$(input_size)``.
+
+    Only multiplication and division are supported.  Operands may be
+    constants or tuples.  The result is rounded to ``int``.
+
+    Args:
+        str_to_eval: String expression to evaluate.
+
+    Returns:
+        Evaluated result as int or tuple of ints.
+    """
+    bin_ops: dict[type, Callable[[Any, Any], Any]] = {
+        ast.Mult: operator.mul,
+        ast.Div: operator.truediv,
+    }
+    un_ops: dict[type, Callable[[Any], Any]] = {
+        ast.USub: operator.neg,
+        ast.UAdd: operator.pos,
+    }
+    available_ops = tuple(bin_ops) + tuple(un_ops) + (ast.BinOp, ast.UnaryOp)
+
+    tree = ast.parse(str_to_eval, mode="eval")
+
+    def _eval(node: Any) -> Any:  # noqa: ANN401
+        if isinstance(node, ast.Expression):
+            return _eval(node.body)
+        if isinstance(node, ast.Constant):
+            return node.value
+        if isinstance(node, ast.Tuple):
+            return torch.tensor([_eval(val) for val in node.elts])
+        if isinstance(node, ast.BinOp) and type(node.op) in bin_ops:
+            left = _eval(node.left)
+            right = _eval(node.right)
+            return bin_ops[type(node.op)](left, right)
+        if isinstance(node, ast.UnaryOp) and type(node.op) in un_ops:
+            operand = _eval(node.operand) if isinstance(node.operand, available_ops) else node.operand.value
+            return un_ops[type(node.op)](operand)
+        msg = f"Bad syntax, {type(node)}. Available operations for calculating input size are {available_ops}"
+        raise SyntaxError(msg)
+
+    ret = _eval(tree)
+    if isinstance(ret, torch.Tensor):
+        return tuple(ret.round().int().tolist())
+    return round(ret)
+
+
+def _configure_input_size(
+    cfg: dict[str, Any],
+    input_size: int | tuple[int, int] | None,
+) -> dict[str, Any]:
+    """Replace ``$(input_size)`` placeholders in augmentation config ``init_args``.
+
+    Input size should be specified as ``$(input_size)``
+    (e.g. ``$(input_size) * 0.5``).  Only simple multiplication or division
+    evaluation is supported.  The function decides whether to pass a ``tuple``
+    or ``int`` based on the type-hint of the target argument.  Floating-point
+    values are rounded to ``int``.
+
+    Args:
+        cfg: Augmentation config dict with ``class_path`` and ``init_args``.
+        input_size: Target input size ``(H, W)`` or single ``int``.
+
+    Returns:
+        Config with placeholders replaced by actual values.
+    """
+    init_args = cfg.get("init_args", {})
+    if not init_args:
+        return cfg
+
+    _input_size: tuple[int, int] | None = None
+    if input_size is not None:
+        _input_size = (input_size, input_size) if isinstance(input_size, int) else tuple(input_size)  # type: ignore[assignment]
+
+    def check_type(value: Any, expected_type: Any) -> bool:  # noqa: ANN401
+        try:
+            typeguard.check_type(value, expected_type)
+        except typeguard.TypeCheckError:
+            return False
+        return True
+
+    model_cls = None
+    for key, val in init_args.items():
+        if not (isinstance(val, str) and "$(input_size)" in val):
+            continue
+
+        if input_size is None:
+            msg = (
+                f"{cfg['class_path'].split('.')[-1]} initial argument has `$(input_size)`, "
+                "but input_size is set to None."
+            )
+            raise RuntimeError(msg)
+
+        if model_cls is None:
+            model_cls = import_object_from_module(cfg["class_path"])
+
+        available_types = typing.get_type_hints(model_cls.__init__).get(key)
+
+        if available_types is None or check_type(_input_size, available_types):
+            # Pass tuple[int, int]
+            init_args[key] = _eval_input_size_str(val.replace("$(input_size)", str(_input_size)))
+        elif check_type(_input_size[0], available_types):  # type: ignore[index]
+            # Pass int
+            init_args[key] = _eval_input_size_str(val.replace("$(input_size)", str(_input_size[0])))  # type: ignore[index]
+        else:
+            msg = f"{key} argument should be able to get int or tuple[int, int], but it can get {available_types}"
+            raise RuntimeError(msg)
+
+    return cfg
+
+
+class _IntensityAdapter(nn.Module):
+    """Wrap an intensity transform and stamp ``img_info.bit_depth``.
+
+    Unlike :class:`_SampleImageAdapter`, this also records the original
+    bit-depth of the image (derived from ``storage_dtype``) on the sample's
+    :class:`~otx.data.entity.base.ImageInfo`.  Downstream code (e.g. YOLOX)
+    can use ``img_info.bit_depth`` to reject unsupported high-bit-depth inputs.
+    """
+
+    def __init__(self, transform: nn.Module, storage_dtype: str = "uint8") -> None:
+        super().__init__()
+        self.transform = transform
+        self.bit_depth = _DTYPE_TO_BIT_DEPTH.get(storage_dtype, 8)
+
+    def forward(self, sample: OTXSample) -> OTXSample:
+        """Apply intensity transform and set ``img_info.bit_depth``."""
+        sample.image = self.transform(sample.image)
+        if hasattr(sample, "img_info") and sample.img_info is not None:
+            sample.img_info.bit_depth = self.bit_depth
+        return sample
+
+
+class CPUAugmentationPipeline(nn.Module):
+    """CPU-stage augmentation pipeline using torchvision.transforms.v2.
+
+    This pipeline runs in Dataset workers (before collate) and handles:
+    - Intensity mapping (uint16 → float32 for medical images)
+    - Size-dependent geometric augmentations (Resize, RandomResizedCrop)
+    - Augmentations applied to image, bboxes, masks, keypoints, etc.
+
+    All outputs are fixed-size tensors suitable for batch stacking.
+
+    The pipeline supports two types of transforms:
+    1. OTX-style transforms: Have a `forward(*OTXDataItem)` signature and handle
+       all data types internally (image, bboxes, masks, etc.)
+    2. Native torchvision.v2 transforms: Applied to all tv_tensors extracted
+       from the OTXDataItem (image, bboxes, masks, keypoints)
+
+    Args:
+        augmentations: List of torchvision.transforms.v2 transforms or OTX transforms.
+
+    Example:
+        >>> pipeline = CPUAugmentationPipeline([
+        ...     v2.RandomResizedCrop(size=(224, 224)),
+        ...     v2.RandomHorizontalFlip(p=0.5),
+        ...     v2.ToDtype(torch.float32, scale=True),
+        ... ])
+        >>> item = pipeline(item)  # OTXDataItem with fixed-size image
+    """
+
+    def __init__(self, augmentations: list[nn.Module] | None = None) -> None:
+        super().__init__()
+        self.augmentations = nn.ModuleList(augmentations or [])
+        self._mean, self._std = self._extract_normalization_params(list(self.augmentations))
+
+    @staticmethod
+    def _extract_normalization_params(
+        augmentations: list[nn.Module],
+    ) -> tuple[tuple[float, float, float] | None, tuple[float, float, float] | None]:
+        """Extract mean and std from the first torchvision Normalize transform found.
+
+        Args:
+            augmentations: List of augmentation modules.
+
+        Returns:
+            Tuple of (mean, std) extracted from first Normalize found, or (None, None).
+        """
+        for transform in augmentations:
+            if isinstance(transform, tvt_v2.Normalize):
+                mean: tuple[float, float, float] = tuple(float(v) for v in transform.mean)  # type: ignore[assignment]
+                std: tuple[float, float, float] = tuple(float(v) for v in transform.std)  # type: ignore[assignment]
+                return mean, std
+        return None, None
+
+    @property
+    def mean(self) -> tuple[float, float, float] | None:
+        """Get normalization mean."""
+        return self._mean
+
+    @property
+    def std(self) -> tuple[float, float, float] | None:
+        """Get normalization std."""
+        return self._std
+
+    @classmethod
+    def list_available_transforms(cls) -> list[type[tvt_v2.Transform]]:
+        """List available TorchVision transform (only V2) classes."""
+        return [
+            obj
+            for name in dir(tvt_v2)
+            if (obj := getattr(tvt_v2, name)) and isclass(obj) and issubclass(obj, tvt_v2.Transform)
+        ]
+
+    @classmethod
+    def from_config(cls, config: SubsetConfig) -> CPUAugmentationPipeline:
+        """Build CPU augmentation pipeline from SubsetConfig.
+
+        This function handles:
+        - Intensity mapping (prepended automatically from ``IntensityConfig``)
+        - New ``augmentations_cpu`` field (preferred)
+        - Input size placeholder replacement ``$(input_size)``
+
+        The intensity transform is always the **first** operation in the
+        pipeline.  For uint8 with ``mode="scale_to_unit"`` this is equivalent
+        to the old ``to_dtype(float32, scale=True)``; for uint16 / thermal /
+        medical inputs it applies the correct domain-specific mapping.
+
+        Args:
+            config: SubsetConfig with augmentations_cpu and intensity.
+
+        Returns:
+            CPUAugmentationPipeline ready for use in Dataset.
+        """
+        input_size = getattr(config, "input_size", None)
+        aug_configs = config.augmentations_cpu
+        intensity_config = getattr(config, "intensity", None)
+        if isinstance(intensity_config, dict):
+            intensity_config = IntensityConfig(**intensity_config)
+
+        augmentations: list[nn.Module] = []
+
+        # --- 1. Prepend intensity mapping transform ---------------------------
+        if intensity_config is not None:
+            intensity_transform = build_intensity_transform(intensity_config)
+            augmentations.append(_IntensityAdapter(intensity_transform, intensity_config.storage_dtype))
+
+        # --- 2. User-configured augmentations ---------------------------------
+        if aug_configs:
+            for aug_config in aug_configs:
+                cfg = copy(aug_config)
+                if isinstance(cfg, (dict, DictConfig)):
+                    # Handle input_size placeholder
+                    cfg = _configure_input_size(dict(cfg), input_size)
+
+                    # Instantiate the transform
+                    transform = cls._dispatch_transform(cfg)
+                elif isinstance(cfg, nn.Module):
+                    transform = cfg
+                else:
+                    msg = f"Unsupported augmentation config type: {type(cfg)}"
+                    raise TypeError(msg)
+
+                augmentations.append(transform)
+
+        return cls(augmentations)
+
+    @classmethod
+    def _dispatch_transform(cls, cfg_transform: DictConfig | dict | nn.Module) -> nn.Module:
+        """Dispatch and instantiate a transform from config or return as-is.
+
+        Args:
+            cfg_transform: Transform config dict or already instantiated transform.
+
+        Returns:
+            Instantiated transform.
+        """
+        if isinstance(cfg_transform, (DictConfig, dict)):
+            return instantiate_class(args=(), init=dict(cfg_transform))
+        if isinstance(cfg_transform, nn.Module):
+            # Already instantiated transform, return as-is
+            return cfg_transform
+
+        msg = f"CPUAugmentationPipeline accepts only DictConfig | dict | nn.Module, got {type(cfg_transform)}."
+        raise TypeError(msg)
+
+    def _is_native_torchvision_transform(self, transform: nn.Module) -> bool:
+        """Return True if the transform should be applied via ``_apply_native_transform``.
+
+        Rules:
+        - Pure torchvision transforms (module starts with ``torchvision.``) → native.
+        - OTX subclasses of ``tvt_v2.Transform`` that define their own ``forward()``
+          (e.g. ``Resize``, ``CachedMosaic``) handle ``OTXSample`` themselves → NOT native.
+        - OTX wrappers that only add ``__call__`` probability gating without a custom
+          ``forward()`` (e.g. ``RandomIoUCrop``) delegate to the parent torchvision
+          ``forward()`` and must go through ``_apply_native_transform`` → native.
+        """
+        module = type(transform).__module__
+        if module.startswith("torchvision."):
+            return True
+        # OTX class that is a tvt_v2.Transform subclass: treat as native only when it
+        # does NOT define its own forward() (i.e. it relies on the parent's forward).
+        if isinstance(transform, tvt_v2.Transform):
+            return "forward" not in type(transform).__dict__
+        return False
+
+    def _apply_native_transform(self, transform: nn.Module, inputs: OTXSample) -> OTXSample:  # type: ignore[return-value]
+        """Apply native torchvision transform only to image-related fields.
+
+        TorchVision v2 expects standard field names like `boxes`/`labels`; we
+        map to those before calling the transform and map back afterward.
+        We also keep `img_info` in sync when the image size changes.
+        """
+        # Build a dict of transformable fields with torchvision-friendly keys.
+        transformable: dict[str, Any] = {}
+        if (image := getattr(inputs, "image", None)) is not None:
+            transformable["image"] = image
+        if (img_info := getattr(inputs, "img_info", None)) is not None:
+            transformable["img_info"] = img_info
+        if (masks := getattr(inputs, "masks", None)) is not None:
+            transformable["masks"] = masks
+        if (bboxes := getattr(inputs, "bboxes", None)) is not None:
+            transformable["boxes"] = bboxes
+        if (label := getattr(inputs, "label", None)) is not None:
+            transformable["labels"] = label
+
+        if not transformable:
+            return inputs
+
+        # Apply transform to transformable fields
+        # If there's only an image, pass it directly; otherwise pass as dict
+        if len(transformable) == 1 and "image" in transformable:
+            result = transform(transformable["image"])
+            inputs.image = result
+        else:
+            # Reverse mapping: torchvision key → OTXSample attribute name
+            tv_to_otx = {"boxes": "bboxes", "labels": "label"}
+
+            result = transform(transformable)
+            if isinstance(result, dict):
+                for key, value in result.items():
+                    attr = tv_to_otx.get(key, key)
+                    setattr(inputs, attr, value)
+            else:
+                # Single result, assume it's the image
+                inputs.image = result
+        return inputs
+
+    def forward(self, *inputs: OTXSample) -> OTXSample | None:
+        """Forward with skipping None."""
+        needs_unpacking = len(inputs) > 1
+        outputs: OTXSample | None = inputs[0]  # type: ignore[assignment]
+        for transform in self.augmentations:
+            if self._is_native_torchvision_transform(transform):
+                # Apply native transforms only to image-related fields
+                outputs = self._apply_native_transform(transform, inputs[0])
+            else:
+                outputs = transform(*inputs)
+            if outputs is None:
+                return outputs
+            inputs = outputs if needs_unpacking else (outputs,)  # type: ignore[assignment]
+        return outputs
+
+    def __repr__(self) -> str:
+        """String representation of the pipeline."""
+        aug_strs = [f"  {aug}" for aug in self.augmentations]
+        return "CPUAugmentationPipeline(\n" + "\n".join(aug_strs) + "\n)"
+
+
+class GPUAugmentationPipeline(nn.Module):
+    """GPU-stage augmentation pipeline using Kornia AugmentationSequential.
+
+    This pipeline runs on GPU after batch transfer via Lightning Callback.
+    It uses Kornia's AugmentationSequential for efficient batch-level processing
+    with support for multiple data types (images, bboxes, masks, keypoints).
+
+    Key features:
+    - Uses Kornia AugmentationSequential for optimized batch processing
+    - Supports data_keys for transforming bboxes, masks, keypoints along with images
+    - Extracts normalization parameters for model export
+
+    The pipeline expects batched tensors in BCHW format with values in [0, 1].
+
+    Args:
+        augmentations: List of Kornia augmentation modules.
+        data_keys: List of data keys to transform (e.g., ["input", "bbox", "mask"]).
+            Defaults to ["input"] for image-only augmentation.
+
+    Example:
+        >>> import kornia.augmentation as K
+        >>> pipeline = GPUAugmentationPipeline(
+        ...     augmentations=[
+        ...         K.RandomHorizontalFlip(p=0.5),
+        ...         K.ColorJiggle(0.1, 0.1, 0.1, 0.1),
+        ...         K.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ...     ],
+        ...     data_keys=["input"],
+        ... )
+        >>> augmented_images = pipeline(batch_images)
+    """
+
+    def __init__(
+        self,
+        augmentations: list[nn.Module] | None = None,
+        data_keys: list[str] | None = None,
+    ) -> None:
+        super().__init__()
+        self._augmentations_list = augmentations or []
+        self._data_keys = data_keys or ["input"]
+        self._mean, self._std = self._extract_normalization_params(self._augmentations_list)
+
+        # Build Kornia AugmentationSequential for efficient batch processing
+        self.aug_sequential: K.AugmentationSequential | None = None
+        if self._augmentations_list:
+            _ensure_kornia_patched()
+            # Cast to Any because Kornia stubs restrict to _AugmentationBase but
+            # any nn.Module works at runtime.
+            _augs: Any = self._augmentations_list
+            self.aug_sequential = K.AugmentationSequential(
+                *_augs,
+                data_keys=self._data_keys,
+                same_on_batch=False,
+            )
+
+    @staticmethod
+    def _extract_normalization_params(
+        augmentations: list[nn.Module],
+    ) -> tuple[tuple[float, float, float] | None, tuple[float, float, float] | None]:
+        """Extract mean and std from Normalize augmentation in the list.
+
+        Args:
+            augmentations: List of augmentation modules.
+
+        Returns:
+            Tuple of (mean, std) extracted from first Normalize found, or (None, None).
+        """
+        mean: tuple[float, float, float] | None = None
+        std: tuple[float, float, float] | None = None
+
+        for aug in augmentations:
+            # Check if this is a Normalize augmentation (Kornia stores in flags dict)
+            if isinstance(aug, K.Normalize):
+                mean = typing.cast("tuple[float, float, float]", tuple(aug.flags["mean"].tolist()))
+                std = typing.cast("tuple[float, float, float]", tuple(aug.flags["std"].tolist()))
+                # Stop after finding the first Normalize
+                break
+
+        return mean, std
+
+    @property
+    def mean(self) -> tuple[float, float, float] | None:
+        """Get normalization mean."""
+        return self._mean
+
+    @property
+    def std(self) -> tuple[float, float, float] | None:
+        """Get normalization std."""
+        return self._std
+
+    @property
+    def data_keys(self) -> list[str]:
+        """Get data keys used by the pipeline."""
+        return self._data_keys
+
+    @classmethod
+    def list_available_transforms(cls) -> list[type]:
+        """List available Kornia augmentation classes."""
+        return [
+            obj
+            for name in dir(K)
+            if (obj := getattr(K, name))
+            and isclass(obj)
+            and issubclass(obj, (K.AugmentationBase2D, K.IntensityAugmentationBase2D))
+        ]
+
+    @classmethod
+    def from_config(
+        cls,
+        config: SubsetConfig,
+        data_keys: list[str] | None = None,
+    ) -> GPUAugmentationPipeline:
+        """Build GPU augmentation pipeline from SubsetConfig.
+
+        This function handles:
+        - `augmentations_gpu` field with Kornia augmentations
+        - Extraction of normalization parameters for model update
+        - Input size placeholder replacement $(input_size)
+        - data_keys for Kornia AugmentationSequential
+
+        Args:
+            config: SubsetConfig with augmentations_gpu.
+            data_keys: List of data keys for AugmentationSequential.
+                Defaults to ["input"] for image-only augmentation.
+
+        Returns:
+            GPUAugmentationPipeline ready for use in Callback.
+        """
+        input_size = getattr(config, "input_size", None)
+        aug_configs = config.augmentations_gpu
+
+        if not aug_configs:
+            return cls([], data_keys=data_keys)
+
+        augmentations = []
+
+        for aug_config in aug_configs:
+            cfg = copy(aug_config)
+            if isinstance(cfg, (dict, DictConfig)):
+                # Handle input_size placeholder
+                cfg = _configure_input_size(dict(cfg), input_size)
+
+                # Instantiate the transform
+                transform = cls._dispatch_transform(cfg)
+            elif isinstance(cfg, nn.Module):
+                transform = cfg
+            else:
+                msg = f"Unsupported augmentation config type: {type(cfg)}"
+                raise TypeError(msg)
+
+            augmentations.append(transform)
+
+        return cls(augmentations, data_keys=data_keys)
+
+    @classmethod
+    def _dispatch_transform(cls, cfg_transform: DictConfig | dict | nn.Module) -> nn.Module:
+        """Dispatch and instantiate a transform from config or return as-is.
+
+        Args:
+            cfg_transform: Transform config dict or already instantiated transform.
+
+        Returns:
+            Instantiated transform.
+        """
+        if isinstance(cfg_transform, (DictConfig, dict)):
+            return instantiate_class(args=(), init=dict(cfg_transform))
+        if isinstance(cfg_transform, nn.Module):
+            # Already instantiated transform, return as-is
+            return cfg_transform
+        msg = f"GPUAugmentationPipeline accepts only DictConfig | dict | nn.Module, got {type(cfg_transform)}."
+        raise TypeError(msg)
+
+    def forward(
+        self,
+        images: torch.Tensor,
+        labels: list[torch.Tensor] | None = None,
+        bboxes: list[torch.Tensor] | None = None,
+        masks: list[torch.Tensor] | None = None,
+        keypoints: list[torch.Tensor] | None = None,
+    ) -> dict[str, Any]:
+        """Apply GPU augmentations to batched data using Kornia AugmentationSequential.
+
+        Args:
+            images: Batched images tensor in BCHW format, values in [0, 1].
+            labels: List of labels per image (optional).
+            bboxes: List of bounding boxes per image (optional).
+            masks: List of masks per image (optional). Can be:
+                - Semantic segmentation: (C, H, W) format
+                - Instance segmentation: (N_instances, H, W) format
+            keypoints: List of keypoints per image (optional).
+
+        Returns:
+            Dict with augmented data:
+            {"images": tensor, "labels": list, "bboxes": list, "masks": list, "keypoints": list}
+        """
+        if self.aug_sequential is None:
+            return {"images": images, "labels": labels, "bboxes": bboxes, "masks": masks, "keypoints": keypoints}
+
+        # Handle instance segmentation masks: Kornia expects (N, C, H, W) but instance
+        # masks are (N_instances, H, W). We add a channel dim before and squeeze after.
+        # This allows Kornia to properly transform instance masks along with images.
+        if masks is not None and "mask" in self._data_keys:
+            # Instance seg masks: (N_instances, H, W) - 3D per sample
+            # Semantic seg masks: (C, H, W) where C is often 1 or num_classes
+            # We add channel dim to all masks for consistency with Kornia
+            masks = [m.unsqueeze(0) for m in masks]  # (N, H, W) -> (N, 1, H, W)
+
+        # Kornia expects keypoints as a single (B, N, 2) tensor, not a list.
+        if keypoints is not None and "keypoints" in self._data_keys:
+            keypoints = torch.stack(keypoints)  # type: ignore[assignment]  # (B, N, 2)
+
+        # Map data key names to actual data
+        data_map = {
+            "input": images,
+            "label": labels,
+            "bbox_xyxy": bboxes,
+            "mask": masks,
+            "keypoints": keypoints,
+        }
+
+        # Build input list in the SAME ORDER as self._data_keys
+        # This is critical because Kornia uses the order to match data to keys
+        inputs = []
+        provided_keys = []
+        for key in self._data_keys:
+            data = data_map.get(key)
+            if data is not None:
+                inputs.append(data)
+                provided_keys.append(key)
+
+        # Apply augmentation to all inputs
+        results = self.aug_sequential(*inputs)
+
+        # Kornia returns a plain tensor when only one data key is provided,
+        # but a list when multiple keys are used. Normalise to always be a list.
+        if not isinstance(results, (list, tuple)):
+            results = [results]
+
+        # Parse results back
+        output = {"images": None, "labels": labels, "bboxes": bboxes, "masks": masks, "keypoints": keypoints}
+        for i, key in enumerate(provided_keys):
+            if key == "input":
+                output["images"] = results[i]
+            elif key == "label":
+                output["labels"] = results[i]
+            elif key == "bbox_xyxy":
+                output["bboxes"] = results[i]
+            elif key == "mask":
+                # Remove channel
+                mask_results = results[i]
+                mask_results = [m.squeeze(0) for m in mask_results]  # (1, N, H, W) -> (N, H, W)
+                output["masks"] = mask_results
+            elif key == "keypoints":
+                # Kornia returns (B, N, 2) tensor; split back to list of per-sample tensors
+                kp_result = results[i]
+                if isinstance(kp_result, torch.Tensor) and kp_result.dim() == 3:
+                    output["keypoints"] = list(kp_result.unbind(0))
+                else:
+                    output["keypoints"] = kp_result
+
+        # Sanitize geometric annotations after Kornia transforms.
+        if output["images"] is not None:
+            s_bboxes, s_labels, s_masks, s_keypoints = self._sanitize_annotations(
+                output["images"],
+                output["bboxes"],
+                output["labels"],
+                output["masks"],
+                output["keypoints"],
+            )
+            output["bboxes"] = s_bboxes
+            output["labels"] = s_labels
+            output["masks"] = s_masks
+            output["keypoints"] = s_keypoints
+
+        return output
+
+    def _sanitize_annotations(
+        self,
+        images: torch.Tensor,
+        bboxes: list[torch.Tensor] | None,
+        labels: list[torch.Tensor] | None,
+        masks: list[torch.Tensor] | None,
+        keypoints: list[torch.Tensor] | None,
+        min_size: float = 4.0,
+        min_area: float = 16.0,
+    ) -> tuple[
+        list[torch.Tensor] | None,
+        list[torch.Tensor] | None,
+        list[torch.Tensor] | None,
+        list[torch.Tensor] | None,
+    ]:
+        """Sanitize transformed annotations.
+
+        - Clip bboxes to image bounds
+        - Remove invalid bboxes (non-finite, x2<=x1, y2<=y1, too small)
+        - Filter aligned labels/masks/keypoints using the same valid indices
+        """
+        if bboxes is None:
+            return bboxes, labels, masks, keypoints
+
+        batch_size, _, h, w = images.shape
+        if len(bboxes) != batch_size:
+            msg = f"GPU sanitize: bboxes batch mismatch, got {len(bboxes)} vs {batch_size}"
+            raise RuntimeError(msg)
+
+        if labels is not None and len(labels) != batch_size:
+            msg = f"GPU sanitize: labels batch mismatch, got {len(labels)} vs {batch_size}"
+            raise RuntimeError(msg)
+        if masks is not None and len(masks) != batch_size:
+            msg = f"GPU sanitize: masks batch mismatch, got {len(masks)} vs {batch_size}"
+            raise RuntimeError(msg)
+        if keypoints is not None and len(keypoints) != batch_size:
+            msg = f"GPU sanitize: keypoints batch mismatch, got {len(keypoints)} vs {batch_size}"
+            raise RuntimeError(msg)
+
+        out_bboxes: list[torch.Tensor] = []
+        out_labels: list[torch.Tensor] | None = [] if labels is not None else None
+        out_masks: list[torch.Tensor] | None = [] if masks is not None else None
+        out_keypoints: list[torch.Tensor] | None = [] if keypoints is not None else None
+
+        for i in range(batch_size):
+            boxes = bboxes[i]
+            if not (boxes.ndim == 2 and boxes.shape[-1] == 4):
+                msg = f"GPU sanitize: bboxes[{i}] must be [N,4], got {tuple(boxes.shape)}"
+                raise RuntimeError(msg)
+
+            if boxes.numel() == 0:
+                clipped = boxes
+                valid = torch.zeros((0,), dtype=torch.bool, device=boxes.device)
+            else:
+                clipped = boxes
+                clipped[:, 0::2].clamp_(0, w)
+                clipped[:, 1::2].clamp_(0, h)
+
+                x1, y1, x2, y2 = clipped[:, 0], clipped[:, 1], clipped[:, 2], clipped[:, 3]
+                widths = x2 - x1
+                heights = y2 - y1
+                areas = widths * heights
+                valid = (
+                    torch.isfinite(clipped).all(dim=1)
+                    & (widths > min_size)
+                    & (heights > min_size)
+                    & (areas >= min_area)
+                )
+
+            out_bboxes.append(clipped[valid])
+
+            if out_labels is not None and labels is not None:
+                lbl = labels[i]
+                if not (lbl.ndim >= 1 and lbl.shape[0] == valid.shape[0]):
+                    msg = f"GPU sanitize: labels[{i}] size mismatch with bboxes ({lbl.shape[0]} vs {valid.shape[0]})"
+                    raise RuntimeError(msg)
+                out_labels.append(lbl[valid])
+
+            if out_masks is not None and masks is not None:
+                m = masks[i]
+                # For instance masks, first dimension corresponds to object index.
+                if m.ndim >= 3 and m.shape[0] == valid.shape[0]:
+                    out_masks.append(m[valid])
+                elif m.ndim == 2 and valid.shape[0] == 1:
+                    out_masks.append(m.unsqueeze(0)[valid])
+                else:
+                    out_masks.append(m)
+
+            if out_keypoints is not None and keypoints is not None:
+                kp = keypoints[i]
+                if kp.numel() > 0:
+                    kp[..., 0].clamp_(0, w)
+                    kp[..., 1].clamp_(0, h)
+                if kp.ndim >= 2 and kp.shape[0] == valid.shape[0]:
+                    out_keypoints.append(kp[valid])
+                else:
+                    out_keypoints.append(kp)
+
+        return out_bboxes, out_labels, out_masks, out_keypoints
+
+    def __repr__(self) -> str:
+        """String representation of the pipeline."""
+        aug_str = str(self.aug_sequential) if self.aug_sequential is not None else "  (empty)"
+        info = f"  mean={self._mean}, std={self._std}" if self._mean or self._std else ""
+        return f"GPUAugmentationPipeline(\n{aug_str}\n  data_keys={self._data_keys}{info}\n)"
diff --git a/library/src/otx/data/augmentation/transforms.py b/library/src/otx/data/augmentation/transforms.py
new file mode 100644
index 00000000000..9d00ef08d2d
--- /dev/null
+++ b/library/src/otx/data/augmentation/transforms.py
@@ -0,0 +1,972 @@
+# Copyright (C) 2023-2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""Custom image transforms for OTX augmentation pipeline."""
+
+from __future__ import annotations
+
+import copy
+import typing
+from typing import Any, cast
+
+import torch
+import torchvision.transforms.v2 as tvt_v2
+from torchvision import tv_tensors
+from torchvision.transforms.v2 import functional as F  # noqa: N812
+
+from otx.data.augmentation.kernels import (
+    _resize_image_info,
+    _resized_crop_image_info,
+)
+from otx.data.entity.sample import OTXSample
+
+
+class Resize(tvt_v2.Transform):
+    """Resize transform based on torchvision.transforms.v2.
+
+    Extends torchvision's Resize with optional control over target resizing
+    and aspect ratio preservation with padding.
+
+    Args:
+        size (int or tuple): Target size (height, width). If int, a square (size, size) is used.
+        resize_targets (bool): If True, resize all targets (bboxes, masks, keypoints)
+            along with the image. If False, resize only the image and leave targets unchanged.
+            Defaults to True.
+        keep_aspect_ratio (bool): If True, preserve the aspect ratio of the original image
+            by resizing to fit within the target size and padding to reach exact target dimensions.
+            Defaults to False.
+        pad_value (int or tuple): Padding value for image. Defaults to 0.
+        interpolation: Interpolation mode for images. Defaults to InterpolationMode.BILINEAR.
+        antialias: Whether to apply antialiasing. Defaults to True.
+    """
+
+    def __init__(
+        self,
+        size: int | tuple[int, int],
+        resize_targets: bool = True,
+        keep_aspect_ratio: bool = False,
+        pad_value: int | tuple[int, int, int] = 0,
+        interpolation: F.InterpolationMode = F.InterpolationMode.BILINEAR,
+        antialias: bool = True,
+    ) -> None:
+        super().__init__()
+        self.size = (size, size) if isinstance(size, int) else tuple(size)
+        self.resize_targets = resize_targets
+        self.keep_aspect_ratio = keep_aspect_ratio
+        self.pad_value = pad_value
+        self.interpolation = interpolation
+        self.antialias = antialias
+
+    def _compute_resize_params(
+        self,
+        orig_h: int,
+        orig_w: int,
+    ) -> tuple[int, int, int, int, int, int]:
+        """Compute resize dimensions and padding.
+
+        Returns:
+            new_h, new_w: Size after resize (before padding).
+            pad_left, pad_top, pad_right, pad_bottom: Padding amounts.
+        """
+        target_h, target_w = self.size
+
+        if not self.keep_aspect_ratio:
+            return target_h, target_w, 0, 0, 0, 0
+
+        # Compute scale to fit within target while preserving aspect ratio
+        scale = min(target_w / orig_w, target_h / orig_h)
+        new_w = round(orig_w * scale)
+        new_h = round(orig_h * scale)
+
+        # Compute padding to reach target size
+        # Use bottom-right padding only (matching develop branch behavior)
+        # This is important because model post-processing assumes no left/top offset
+        pad_w = target_w - new_w
+        pad_h = target_h - new_h
+        pad_left = 0
+        pad_right = pad_w
+        pad_top = 0
+        pad_bottom = pad_h
+
+        return new_h, new_w, pad_left, pad_top, pad_right, pad_bottom
+
+    def forward(self, *inputs: OTXSample) -> OTXSample:  # type: ignore[override]
+        """Resize image and optionally targets, with optional aspect ratio preservation."""
+        if len(inputs) > 1:
+            msg = "Resize expects a single OTXSample input"
+            raise ValueError(msg)
+        sample: OTXSample = inputs[0]
+
+        if not hasattr(sample, "image"):
+            # Fallback: just resize the tensor directly
+            return cast(
+                "OTXSample",
+                F.resize(
+                    cast("torch.Tensor", sample),
+                    size=list(self.size),
+                    interpolation=self.interpolation,
+                    antialias=self.antialias,
+                ),
+            )
+
+        # Get original dimensions
+        orig_h, orig_w = sample.image.shape[-2:]
+
+        # Compute resize and padding parameters
+        new_h, new_w, pad_left, pad_top, pad_right, pad_bottom = self._compute_resize_params(orig_h, orig_w)
+
+        # Resize image
+        sample.image = F.resize(
+            sample.image,
+            size=[new_h, new_w],
+            interpolation=self.interpolation,
+            antialias=self.antialias,
+        )
+        sample.image = sample.image.clamp(0, 1)
+        # Apply padding if needed
+        if pad_left > 0 or pad_top > 0 or pad_right > 0 or pad_bottom > 0:
+            fill_value: float | int | list[float] = (
+                list(self.pad_value) if isinstance(self.pad_value, tuple) else self.pad_value
+            )
+            sample.image = F.pad(
+                sample.image,
+                padding=[pad_left, pad_top, pad_right, pad_bottom],
+                fill=fill_value,
+            )
+
+        # Calculate scale factors for target transforms
+        scale_x = new_w / orig_w
+        scale_y = new_h / orig_h
+
+        # Resize/transform targets if requested
+        if self.resize_targets:
+            # Transform bounding boxes
+            bboxes = getattr(sample, "bboxes", None)
+            if bboxes is not None and len(bboxes) > 0:
+                # Scale bboxes
+                if isinstance(bboxes, tv_tensors.BoundingBoxes):
+                    bboxes_data = bboxes.clone()
+                else:
+                    bboxes_data = bboxes.clone() if isinstance(bboxes, torch.Tensor) else torch.as_tensor(bboxes)
+
+                # Apply scaling
+                bboxes_data[..., 0::2] = bboxes_data[..., 0::2] * scale_x  # x coordinates
+                bboxes_data[..., 1::2] = bboxes_data[..., 1::2] * scale_y  # y coordinates
+
+                # Apply padding offset
+                if pad_left > 0 or pad_top > 0:
+                    bboxes_data[..., 0::2] = bboxes_data[..., 0::2] + pad_left  # x coordinates
+                    bboxes_data[..., 1::2] = bboxes_data[..., 1::2] + pad_top  # y coordinates
+
+                sample.bboxes = tv_tensors.BoundingBoxes(  # type: ignore[missing-attribute]
+                    bboxes_data,
+                    format=bboxes.format if isinstance(bboxes, tv_tensors.BoundingBoxes) else "XYXY",
+                    canvas_size=self.size,
+                )
+
+            # Transform masks
+            masks = getattr(sample, "masks", None)
+            if masks is not None and len(masks) > 0:
+                # Resize masks
+                resized_masks = F.resize(
+                    masks,
+                    size=[new_h, new_w],
+                    interpolation=F.InterpolationMode.NEAREST,
+                    antialias=False,
+                )
+                # Pad masks
+                if pad_left > 0 or pad_top > 0 or pad_right > 0 or pad_bottom > 0:
+                    resized_masks = F.pad(
+                        resized_masks,
+                        padding=[pad_left, pad_top, pad_right, pad_bottom],
+                        fill=0,
+                    )
+                sample.masks = (  # type: ignore[missing-attribute]
+                    tv_tensors.Mask(resized_masks) if isinstance(masks, tv_tensors.Mask) else resized_masks
+                )
+
+            # Transform keypoints/points
+            keypoints = getattr(sample, "keypoints", None)
+            if keypoints is not None and isinstance(keypoints, torch.Tensor):
+                keypoints = keypoints.clone()
+                # Scale keypoints (assuming format [..., x, y] or [..., x, y, visibility])
+                keypoints[..., 0] = keypoints[..., 0] * scale_x + pad_left
+                keypoints[..., 1] = keypoints[..., 1] * scale_y + pad_top
+                sample.keypoints = keypoints  # type: ignore[missing-attribute]
+
+        # Update img_info if available
+        if hasattr(sample, "img_info") and sample.img_info is not None:
+            # First update scale info based on resized (pre-pad) shape
+            sample.img_info = _resize_image_info(sample.img_info, (new_h, new_w))
+            # Then apply padding metadata if any
+            if pad_left > 0 or pad_top > 0 or pad_right > 0 or pad_bottom > 0:
+                sample.img_info.padding = (pad_left, pad_top, pad_right, pad_bottom)
+                sample.img_info.img_shape = (
+                    new_h + pad_top + pad_bottom,
+                    new_w + pad_left + pad_right,
+                )
+            if self.keep_aspect_ratio:
+                # Store padding info for potential inverse transforms
+                sample.img_info.pad_offset = (pad_left, pad_top, pad_right, pad_bottom)  # type: ignore[missing-attribute]
+                # ImageInfo.scale_factor uses (height, width)
+                sample.img_info.scale_factor = (scale_y, scale_x)
+                sample.img_info.keep_ratio = True
+
+        return sample
+
+
+class CachedMosaic(tvt_v2.Transform):
+    """Mosaic augmentation with caching using pure torchvision operations.
+
+    Combines four images into a single mosaic image by placing them in quadrants
+    around a randomly chosen center point. Uses caching to improve randomness
+    without requiring dataset access.
+
+    This implementation uses only torch/torchvision operations, no numpy/cv2.
+
+    Args:
+        img_scale (tuple[int, int]): Target image size (height, width) for each
+            image before creating mosaic. Defaults to (640, 640).
+        center_ratio_range (tuple[float, float]): Range for random center position
+            as ratio of img_scale. Defaults to (0.5, 1.5).
+        bbox_clip_border (bool): Whether to clip bboxes to image boundary.
+            Defaults to True.
+        pad_val (float | tuple[float, float, float]): Padding value for mosaic canvas.
+            Defaults to 114.0.
+        probability (float): Probability of applying mosaic. Defaults to 1.0.
+        max_cached_images (int): Maximum number of cached images. Defaults to 40.
+        random_pop (bool): If True, randomly remove cached images when full.
+            If False, use FIFO. Defaults to True.
+    """
+
+    def __init__(
+        self,
+        img_scale: tuple[int, int] | list[int] = (640, 640),  # (H, W)
+        center_ratio_range: tuple[float, float] = (0.5, 1.5),
+        bbox_clip_border: bool = True,
+        pad_val: float | tuple[float, float, float] = 114.0,
+        probability: float = 1.0,
+        max_cached_images: int = 40,
+        random_pop: bool = True,
+    ) -> None:
+        super().__init__()
+
+        if not isinstance(img_scale, (tuple, list)):
+            msg = "img_scale must be a tuple or list"
+            raise TypeError(msg)
+        if not 0 <= probability <= 1.0:
+            msg = f"probability must be in [0, 1], got {probability}"
+            raise ValueError(msg)
+        if max_cached_images < 4:
+            msg = f"max_cached_images must be >= 4, got {max_cached_images}"
+            raise ValueError(msg)
+
+        self.img_scale = tuple(img_scale)  # (H, W)
+        self.center_ratio_range = center_ratio_range
+        self.bbox_clip_border = bbox_clip_border
+        self.pad_val = pad_val
+        self.prob = probability
+        self.max_cached_images = max_cached_images
+        self.random_pop = random_pop
+
+        self.results_cache: list[OTXSample] = []
+
+    def _resize_keep_ratio(self, img: torch.Tensor, target_h: int, target_w: int) -> torch.Tensor:
+        """Resize image keeping aspect ratio using torchvision.
+
+        Args:
+            img: CHW tensor image.
+            target_h: Target height.
+            target_w: Target width.
+
+        Returns:
+            Resized CHW tensor that fits within target size.
+        """
+        _, h, w = img.shape
+        scale = min(target_h / h, target_w / w)
+        new_h = round(h * scale)
+        new_w = round(w * scale)
+        return F.resize(img, size=[new_h, new_w], interpolation=F.InterpolationMode.BILINEAR, antialias=True)
+
+    def _resize_masks_keep_ratio(
+        self,
+        masks: torch.Tensor,
+        target_h: int,
+        target_w: int,
+        orig_h: int,
+        orig_w: int,
+    ) -> torch.Tensor:
+        """Resize masks keeping aspect ratio using torchvision.
+
+        Args:
+            masks: NxHxW tensor masks.
+            target_h: Target height.
+            target_w: Target width.
+            orig_h: Original image height (for scale calculation).
+            orig_w: Original image width (for scale calculation).
+
+        Returns:
+            Resized masks that match the resized image dimensions.
+        """
+        scale = min(target_h / orig_h, target_w / orig_w)
+        new_h = round(orig_h * scale)
+        new_w = round(orig_w * scale)
+        return F.resize(masks, size=[new_h, new_w], interpolation=F.InterpolationMode.NEAREST, antialias=False)
+
+    def _compute_mosaic_params(
+        self,
+        loc: str,
+        center_x: int,
+        center_y: int,
+        img_h: int,
+        img_w: int,
+    ) -> tuple[tuple[int, int, int, int], tuple[int, int, int, int], int, int]:
+        """Compute paste and crop coordinates for mosaic placement.
+
+        Args:
+            loc: Position string ("top_left", "top_right", "bottom_left", "bottom_right").
+            center_x: X coordinate of mosaic center.
+            center_y: Y coordinate of mosaic center.
+            img_h: Height of the image to place.
+            img_w: Width of the image to place.
+
+        Returns:
+            paste_coord: (x1, y1, x2, y2) coordinates in mosaic canvas.
+            crop_coord: (x1, y1, x2, y2) coordinates in source image.
+            pad_w: Horizontal offset for bbox/mask adjustment.
+            pad_h: Vertical offset for bbox/mask adjustment.
+        """
+        mosaic_h = self.img_scale[0] * 2
+        mosaic_w = self.img_scale[1] * 2
+
+        if loc == "top_left":
+            x1_p = max(center_x - img_w, 0)
+            y1_p = max(center_y - img_h, 0)
+            x2_p = center_x
+            y2_p = center_y
+            x1_c = img_w - (x2_p - x1_p)
+            y1_c = img_h - (y2_p - y1_p)
+            x2_c = img_w
+            y2_c = img_h
+
+        elif loc == "top_right":
+            x1_p = center_x
+            y1_p = max(center_y - img_h, 0)
+            x2_p = min(center_x + img_w, mosaic_w)
+            y2_p = center_y
+            x1_c = 0
+            y1_c = img_h - (y2_p - y1_p)
+            x2_c = min(img_w, x2_p - x1_p)
+            y2_c = img_h
+
+        elif loc == "bottom_left":
+            x1_p = max(center_x - img_w, 0)
+            y1_p = center_y
+            x2_p = center_x
+            y2_p = min(center_y + img_h, mosaic_h)
+            x1_c = img_w - (x2_p - x1_p)
+            y1_c = 0
+            x2_c = img_w
+            y2_c = min(img_h, y2_p - y1_p)
+
+        else:  # bottom_right
+            x1_p = center_x
+            y1_p = center_y
+            x2_p = min(center_x + img_w, mosaic_w)
+            y2_p = min(center_y + img_h, mosaic_h)
+            x1_c = 0
+            y1_c = 0
+            x2_c = min(img_w, x2_p - x1_p)
+            y2_c = min(img_h, y2_p - y1_p)
+
+        paste_coord = (x1_p, y1_p, x2_p, y2_p)
+        crop_coord = (x1_c, y1_c, x2_c, y2_c)
+        pad_w = x1_p - x1_c
+        pad_h = y1_p - y1_c
+
+        return paste_coord, crop_coord, pad_w, pad_h
+
+    def _scale_bboxes(self, bboxes: torch.Tensor, scale: float) -> torch.Tensor:
+        """Scale bboxes by a factor (pure torch)."""
+        return bboxes * scale
+
+    def _translate_bboxes(self, bboxes: torch.Tensor, offset_x: int, offset_y: int) -> torch.Tensor:
+        """Translate bboxes by offset (pure torch)."""
+        if bboxes.numel() == 0:
+            return bboxes
+        offset = bboxes.new_tensor([offset_x, offset_y, offset_x, offset_y])
+        return bboxes + offset
+
+    def _clip_bboxes(self, bboxes: torch.Tensor, h: int, w: int) -> torch.Tensor:
+        """Clip bboxes to image boundary (pure torch)."""
+        if bboxes.numel() == 0:
+            return bboxes
+        bboxes[..., 0::2] = bboxes[..., 0::2].clamp(0, w)
+        bboxes[..., 1::2] = bboxes[..., 1::2].clamp(0, h)
+        return bboxes
+
+    def _filter_valid_bboxes(self, bboxes: torch.Tensor, h: int, w: int) -> torch.Tensor:
+        """Get mask for valid bboxes."""
+        if bboxes.numel() == 0:
+            return torch.zeros(0, dtype=torch.bool, device=bboxes.device)
+
+        x1, y1, x2, y2 = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
+        valid = (x2 > x1) & (y2 > y1)
+
+        # If clipping is disabled, still ensure boxes overlap image at least partially.
+        if not self.bbox_clip_border:
+            inside = (x2 > 0) & (y2 > 0) & (x1 < w) & (y1 < h)
+            valid = valid & inside
+
+        return valid
+
+    def _create_mosaic_canvas(self, dtype: torch.dtype, device: torch.device) -> torch.Tensor:
+        """Create empty mosaic canvas filled with pad_val."""
+        mosaic_h = self.img_scale[0] * 2
+        mosaic_w = self.img_scale[1] * 2
+
+        fill_val = self.pad_val if isinstance(self.pad_val, (int, float)) else self.pad_val[0]
+        if fill_val > 1.0 + 1e-5:
+            fill_val = fill_val / 255.0
+        return torch.full((3, mosaic_h, mosaic_w), fill_val, dtype=dtype, device=device)
+
+    def _create_mask_canvas(self, n_masks: int, device: torch.device) -> torch.Tensor:
+        """Create empty mask canvas."""
+        mosaic_h = self.img_scale[0] * 2
+        mosaic_w = self.img_scale[1] * 2
+        return torch.zeros((n_masks, mosaic_h, mosaic_w), dtype=torch.uint8, device=device)
+
+    def get_indexes(self, cache: list) -> list:
+        """Get random indexes from cache."""
+        return [int(torch.randint(0, len(cache), (1,)).item()) for _ in range(3)]
+
+    @typing.no_type_check
+    def forward(self, *_inputs: OTXSample) -> OTXSample:
+        """Apply CachedMosaic augmentation.
+
+        Args:
+            _inputs: Single OTXSample input.
+
+        Returns:
+            Augmented OTXSample with mosaic image.
+        """
+        assert len(_inputs) == 1, "Only single sample input is supported"  # noqa: S101
+        inputs = _inputs[0]
+
+        # Add to cache
+        self.results_cache.append(copy.deepcopy(inputs))
+        if len(self.results_cache) > self.max_cached_images:
+            index = int(torch.randint(0, len(self.results_cache), (1,)).item()) if self.random_pop else 0
+            self.results_cache.pop(index)
+
+        # Return early if cache too small
+        if len(self.results_cache) < 4:
+            return inputs
+
+        # Skip with probability
+        if torch.rand(1).item() > self.prob:
+            return inputs
+
+        # Get 3 additional samples from cache
+        indices = self.get_indexes(self.results_cache)
+        mix_results = [self.results_cache[i] for i in indices]
+
+        # Prepare mosaic
+        target_h, target_w = self.img_scale
+        mosaic_h, mosaic_w = target_h * 2, target_w * 2
+
+        # Random center position
+        center_x = int(torch.empty(1).uniform_(*self.center_ratio_range).item() * target_w)
+        center_y = int(torch.empty(1).uniform_(*self.center_ratio_range).item() * target_h)
+
+        # Convert input image to tensor
+        img_tensor = inputs.image
+        device = img_tensor.device
+
+        # Create mosaic canvas
+        mosaic_img = self._create_mosaic_canvas(img_tensor.dtype, device)
+
+        # Collect all bboxes, labels, masks
+        all_bboxes = []
+        all_labels = []
+        all_masks = []
+        with_mask = hasattr(inputs, "masks") and inputs.masks is not None
+
+        loc_strs = ("top_left", "top_right", "bottom_left", "bottom_right")
+        samples = [inputs, *mix_results]
+        # random tile order assignment.
+        order = torch.randperm(len(samples)).tolist()
+        samples = [samples[idx] for idx in order]
+
+        for i, loc in enumerate(loc_strs):
+            sample = samples[i]
+
+            # Convert image to tensor
+            img_i = sample.image
+            _, orig_h, orig_w = img_i.shape
+
+            # Resize keeping aspect ratio
+            scale = min(target_h / orig_h, target_w / orig_w)
+            img_i = self._resize_keep_ratio(img_i, target_h, target_w)
+            _, new_h, new_w = img_i.shape
+
+            # Compute paste/crop coordinates
+            paste_coord, crop_coord, pad_w, pad_h = self._compute_mosaic_params(
+                loc,
+                center_x,
+                center_y,
+                new_h,
+                new_w,
+            )
+            x1_p, y1_p, x2_p, y2_p = paste_coord
+            x1_c, y1_c, x2_c, y2_c = crop_coord
+
+            # Paste image region
+            mosaic_img[:, y1_p:y2_p, x1_p:x2_p] = img_i[:, y1_c:y2_c, x1_c:x2_c]
+
+            # Transform bboxes
+            bboxes_i = sample.bboxes.float()
+
+            bboxes_i = self._scale_bboxes(bboxes_i, scale)
+            bboxes_i = self._translate_bboxes(bboxes_i, pad_w, pad_h)
+            all_bboxes.append(bboxes_i)
+
+            # Collect labels
+            labels_i = sample.label
+            all_labels.append(labels_i)
+
+            # Transform masks if present
+            if with_mask:
+                masks_i = sample.masks
+                if masks_i is not None and len(masks_i) > 0:
+                    # Resize masks
+                    masks_i = self._resize_masks_keep_ratio(masks_i, target_h, target_w, orig_h, orig_w)
+
+                    # Create canvas for this sample's masks and paste
+                    n_masks = masks_i.shape[0]
+                    mask_canvas = self._create_mask_canvas(n_masks, device)
+                    mask_canvas[:, y1_p:y2_p, x1_p:x2_p] = masks_i[:, y1_c:y2_c, x1_c:x2_c]
+                    all_masks.append(mask_canvas)
+
+        # Concatenate all bboxes and labels
+        mosaic_bboxes = torch.cat(all_bboxes, dim=0)
+        mosaic_labels = torch.cat(all_labels, dim=0)
+
+        # Clip bboxes if needed
+        if self.bbox_clip_border:
+            mosaic_bboxes = self._clip_bboxes(mosaic_bboxes, mosaic_h, mosaic_w)
+
+        # Filter valid bboxes
+        valid_mask = self._filter_valid_bboxes(mosaic_bboxes, mosaic_h, mosaic_w)
+        mosaic_bboxes = mosaic_bboxes[valid_mask]
+        mosaic_labels = mosaic_labels[valid_mask]
+
+        # Update inputs
+        inputs.image = mosaic_img.clamp(0, 1)
+        inputs.img_info = _resized_crop_image_info(inputs.img_info, (mosaic_h, mosaic_w))
+        inputs.bboxes = tv_tensors.BoundingBoxes(
+            mosaic_bboxes,
+            format="XYXY",
+            canvas_size=(mosaic_h, mosaic_w),
+        )
+        inputs.label = mosaic_labels
+
+        # Handle masks
+        if with_mask and len(all_masks) > 0:
+            mosaic_masks = torch.cat(all_masks, dim=0)
+            mosaic_masks = mosaic_masks[valid_mask]
+            inputs.masks = tv_tensors.Mask(mosaic_masks)
+
+        return inputs
+
+    def __repr__(self) -> str:
+        repr_str = self.__class__.__name__
+        repr_str += f"(img_scale={self.img_scale}, "
+        repr_str += f"center_ratio_range={self.center_ratio_range}, "
+        repr_str += f"pad_val={self.pad_val}, "
+        repr_str += f"prob={self.prob}, "
+        repr_str += f"max_cached_images={self.max_cached_images}, "
+        repr_str += f"random_pop={self.random_pop}, "
+        return repr_str
+
+
+class CachedMixUp(tvt_v2.Transform):
+    """Pure-torch MixUp augmentation for object detection and instance segmentation.
+
+    Mixes the current image with a cached image using alpha blending.
+    All operations use pure torch - no numpy or cv2.
+
+    Args:
+        img_scale (Sequence[int]): Target image size (H, W). Defaults to (640, 640).
+        ratio_range (Sequence[float]): Scale jitter ratio range. Defaults to (0.5, 1.5).
+        flip_ratio (float): Probability of horizontal flip. Defaults to 0.5.
+        pad_val (float): Padding value (0-255 or 0-1 for float). Defaults to 114.0.
+        max_iters (int): Max iterations to find non-empty cached sample. Defaults to 15.
+        bbox_clip_border (bool): Whether to clip bboxes to image border. Defaults to True.
+        max_cached_images (int): Maximum cache size. Defaults to 20.
+        random_pop (bool): Random vs FIFO cache eviction. Defaults to True.
+        probability (float): Probability of applying mixup. Defaults to 1.0.
+        mix_ratio (float): Blending ratio (0.5 = equal mix). Defaults to 0.5.
+    """
+
+    def __init__(
+        self,
+        img_scale: tuple[int, int] | list[int] = (640, 640),  # (H, W)
+        ratio_range: tuple[float, float] = (0.5, 1.5),
+        flip_ratio: float = 0.5,
+        pad_val: float = 114.0,
+        max_iters: int = 15,
+        bbox_clip_border: bool = True,
+        max_cached_images: int = 20,
+        random_pop: bool = True,
+        probability: float = 1.0,
+        mix_ratio: float = 0.5,
+    ) -> None:
+        super().__init__()
+
+        if not isinstance(img_scale, (tuple, list)):
+            msg = "img_scale must be a tuple or list"
+            raise TypeError(msg)
+        if max_cached_images < 2:
+            msg = f"Cache size must be >= 2, got {max_cached_images}"
+            raise ValueError(msg)
+        if not 0 <= probability <= 1.0:
+            msg = f"Probability must be in [0,1], got {probability}"
+            raise ValueError(msg)
+
+        self.img_scale = tuple(img_scale)  # (H, W)
+        self.ratio_range = ratio_range
+        self.flip_ratio = flip_ratio
+        self.pad_val = pad_val
+        self.max_iters = max_iters
+        self.bbox_clip_border = bbox_clip_border
+        self.max_cached_images = max_cached_images
+        self.random_pop = random_pop
+        self.prob = probability
+        self.mix_ratio = mix_ratio
+
+        self.results_cache: list[OTXSample] = []
+
+    def _resize_keep_ratio(self, img: torch.Tensor, target_size: tuple[int, int]) -> tuple[torch.Tensor, float]:
+        """Resize image keeping aspect ratio using torchvision.
+
+        Args:
+            img: CHW tensor
+            target_size: (H, W) target size
+
+        Returns:
+            Resized image and scale ratio
+        """
+        _, h, w = img.shape
+        target_h, target_w = target_size
+        scale_ratio = min(target_h / h, target_w / w)
+
+        new_h = int(h * scale_ratio)
+        new_w = int(w * scale_ratio)
+
+        resized = F.resize(img, [new_h, new_w], interpolation=F.InterpolationMode.BILINEAR, antialias=True)
+        return resized, scale_ratio
+
+    def _scale_bboxes(self, bboxes: torch.Tensor, scale: float) -> torch.Tensor:
+        """Scale bboxes by a factor."""
+        if bboxes.numel() == 0:
+            return bboxes
+        return bboxes * scale
+
+    def _translate_bboxes(self, bboxes: torch.Tensor, offset_x: float, offset_y: float) -> torch.Tensor:
+        """Translate bboxes by offset."""
+        if bboxes.numel() == 0:
+            return bboxes
+        bboxes[:, 0] += offset_x
+        bboxes[:, 1] += offset_y
+        bboxes[:, 2] += offset_x
+        bboxes[:, 3] += offset_y
+        return bboxes
+
+    def _clip_bboxes(self, bboxes: torch.Tensor, img_h: int, img_w: int) -> torch.Tensor:
+        """Clip bboxes to image boundaries."""
+        if bboxes.numel() == 0:
+            return bboxes
+        bboxes[:, 0].clamp_(0, img_w)
+        bboxes[:, 1].clamp_(0, img_h)
+        bboxes[:, 2].clamp_(0, img_w)
+        bboxes[:, 3].clamp_(0, img_h)
+        return bboxes
+
+    def _flip_bboxes_horizontal(self, bboxes: torch.Tensor, img_w: int) -> torch.Tensor:
+        """Flip bboxes horizontally."""
+        if bboxes.numel() == 0:
+            return bboxes
+        x1 = img_w - bboxes[:, 2]
+        x2 = img_w - bboxes[:, 0]
+        bboxes[:, 0] = x1
+        bboxes[:, 2] = x2
+        return bboxes
+
+    def _filter_valid_bboxes(
+        self,
+        bboxes: torch.Tensor,
+        labels: torch.Tensor,
+        img_h: int,
+        img_w: int,
+        min_area: float = 1.0,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Filter bboxes that are inside image and have valid area.
+
+        Returns:
+            Filtered bboxes, labels, and boolean mask of valid indices
+        """
+        if bboxes.numel() == 0:
+            return bboxes, labels, torch.zeros(0, dtype=torch.bool)
+
+        # Check if bbox center is inside image
+        cx = (bboxes[:, 0] + bboxes[:, 2]) / 2
+        cy = (bboxes[:, 1] + bboxes[:, 3]) / 2
+        inside = (cx >= 0) & (cx < img_w) & (cy >= 0) & (cy < img_h)
+
+        # Check valid area
+        w = bboxes[:, 2] - bboxes[:, 0]
+        h = bboxes[:, 3] - bboxes[:, 1]
+        valid_area = (w > 0) & (h > 0) & (w * h >= min_area)
+
+        valid = inside & valid_area
+        return bboxes[valid], labels[valid], valid
+
+    def _get_cached_index(self) -> int:
+        """Get index of cached sample with non-empty bboxes."""
+        index = 0
+        for _ in range(self.max_iters):
+            index = int(torch.randint(0, len(self.results_cache), (1,)).item())
+            if len(getattr(self.results_cache[index], "bboxes", [])) > 0:
+                return index
+        return index
+
+    @typing.no_type_check
+    def forward(self, *_inputs: OTXSample) -> OTXSample:
+        """Apply MixUp transform using pure torch operations."""
+        assert len(_inputs) == 1, "Multiple inputs not supported"  # noqa: S101
+        inputs = _inputs[0]
+
+        # Cache management
+        self.results_cache.append(copy.deepcopy(inputs))
+        if len(self.results_cache) > self.max_cached_images:
+            pop_idx = int(torch.randint(0, len(self.results_cache), (1,)).item()) if self.random_pop else 0
+            self.results_cache.pop(pop_idx)
+
+        # Early returns
+        if len(self.results_cache) <= 1:
+            return inputs
+
+        if torch.rand(1).item() > self.prob:
+            return inputs
+
+        # Get cached sample
+        cache_idx = self._get_cached_index()
+        cached = copy.deepcopy(self.results_cache[cache_idx])
+
+        if cached.bboxes.shape[0] == 0:
+            return inputs
+
+        ori_img = inputs.image
+        cached_img = cached.image
+
+        _, target_h, target_w = ori_img.shape
+        pad_val = self.pad_val / 255.0 if self.pad_val > 1.0 else self.pad_val
+
+        # Check for masks
+        with_mask = hasattr(inputs, "masks") and inputs.masks is not None
+
+        # Random parameters
+        jit_factor = float(torch.empty(1).uniform_(*self.ratio_range).item())
+        do_flip = torch.rand(1).item() < self.flip_ratio
+
+        # Step 1: Resize cached image keeping aspect ratio
+        cached_resized, scale_ratio = self._resize_keep_ratio(cached_img, self.img_scale)
+        c, resized_h, resized_w = cached_resized.shape
+
+        # Step 2: Paste onto padded canvas
+        canvas = torch.full((c, self.img_scale[0], self.img_scale[1]), pad_val, dtype=cached_resized.dtype)
+        paste_h = min(resized_h, self.img_scale[0])
+        paste_w = min(resized_w, self.img_scale[1])
+        canvas[:, :paste_h, :paste_w] = cached_resized[:, :paste_h, :paste_w]
+
+        # Step 3: Apply scale jitter
+        combined_scale = scale_ratio * jit_factor
+        jit_h = int(self.img_scale[0] * jit_factor)
+        jit_w = int(self.img_scale[1] * jit_factor)
+        canvas_jittered = F.resize(canvas, [jit_h, jit_w], interpolation=F.InterpolationMode.BILINEAR, antialias=True)
+
+        # Step 4: Horizontal flip
+        if do_flip:
+            canvas_jittered = canvas_jittered.flip(-1)
+
+        # Step 5: Pad and random crop
+        jit_h, jit_w = canvas_jittered.shape[-2:]
+        pad_h = max(jit_h, target_h)
+        pad_w = max(jit_w, target_w)
+
+        padded = torch.full((c, pad_h, pad_w), pad_val, dtype=canvas_jittered.dtype)
+        padded[:, :jit_h, :jit_w] = canvas_jittered
+
+        # Random crop offset
+        y_offset = int(torch.randint(0, max(1, pad_h - target_h + 1), (1,)).item()) if pad_h > target_h else 0
+        x_offset = int(torch.randint(0, max(1, pad_w - target_w + 1), (1,)).item()) if pad_w > target_w else 0
+
+        cropped = padded[:, y_offset : y_offset + target_h, x_offset : x_offset + target_w]
+
+        # Step 6: Transform bboxes
+        cached_bboxes = cached.bboxes.float()
+
+        # Scale bboxes
+        cached_bboxes = self._scale_bboxes(cached_bboxes, combined_scale)
+
+        # Clip before flip
+        if self.bbox_clip_border:
+            cached_bboxes = self._clip_bboxes(cached_bboxes, jit_h, jit_w)
+
+        # Flip bboxes
+        if do_flip:
+            cached_bboxes = self._flip_bboxes_horizontal(cached_bboxes, jit_w)
+
+        # Translate bboxes (account for crop offset)
+        cached_bboxes = self._translate_bboxes(cached_bboxes, -x_offset, -y_offset)
+
+        # Clip after translate
+        if self.bbox_clip_border:
+            cached_bboxes = self._clip_bboxes(cached_bboxes, target_h, target_w)
+
+        # Step 7: Mix images (alpha blending)
+        beta = self.mix_ratio
+        mixup_img = beta * ori_img + (1.0 - beta) * cropped
+
+        # Step 8: Combine bboxes and labels
+        ori_bboxes = inputs.bboxes.float()
+        ori_labels = inputs.label
+        cached_labels = cached.label
+        # Filter valid cached bboxes
+        cached_bboxes, cached_labels, valid_mask = self._filter_valid_bboxes(
+            cached_bboxes, cached_labels, target_h, target_w
+        )
+
+        # Concatenate
+        mixup_bboxes = torch.cat([ori_bboxes, cached_bboxes], dim=0)
+        mixup_labels = torch.cat([ori_labels, cached_labels], dim=0)
+
+        # Step 9: Handle masks for instance segmentation
+        if with_mask:
+            ori_masks = inputs.masks
+            cached_masks = cached.masks
+
+            # Transform cached masks - fully vectorized (one interpolate call for all instances)
+            if len(cached_masks) > 0:
+                n_m = cached_masks.shape[0]
+                mh, mw = cached_masks.shape[-2], cached_masks.shape[-1]
+                new_h = int(mh * combined_scale)
+                new_w = int(mw * combined_scale)
+                if new_h > 0 and new_w > 0:
+                    # Batch resize: (N, H, W) -> (N, 1, H, W) -> interpolate -> (N, new_h, new_w)
+                    resized_batch = (
+                        torch.nn.functional.interpolate(
+                            cached_masks.unsqueeze(1).float(),
+                            size=(new_h, new_w),
+                            mode="nearest",
+                        )
+                        .squeeze(1)
+                        .to(torch.uint8)
+                    )
+
+                    # Batch flip
+                    if do_flip:
+                        resized_batch = resized_batch.flip(-1)
+
+                    # Batch translate/crop - same src/dst region for all N masks
+                    out_batch = torch.zeros((n_m, target_h, target_w), dtype=torch.uint8, device=resized_batch.device)
+                    oy, ox = -y_offset, -x_offset
+                    rh, rw = resized_batch.shape[-2], resized_batch.shape[-1]
+                    src_y1 = max(0, oy)
+                    src_y2 = min(rh, oy + target_h)
+                    src_x1 = max(0, ox)
+                    src_x2 = min(rw, ox + target_w)
+                    dst_y1 = max(0, -oy)
+                    dst_y2 = dst_y1 + (src_y2 - src_y1)
+                    dst_x1 = max(0, -ox)
+                    dst_x2 = dst_x1 + (src_x2 - src_x1)
+                    if src_y2 > src_y1 and src_x2 > src_x1:
+                        out_batch[:, dst_y1:dst_y2, dst_x1:dst_x2] = resized_batch[:, src_y1:src_y2, src_x1:src_x2]
+
+                    # Filter by valid_mask and concatenate
+                    cached_masks_transformed = out_batch[valid_mask.cpu()]
+                    if len(cached_masks_transformed) > 0:
+                        mixup_masks = torch.cat([ori_masks, cached_masks_transformed], dim=0)
+                    else:
+                        mixup_masks = ori_masks
+                else:
+                    mixup_masks = ori_masks
+            else:
+                mixup_masks = ori_masks
+
+            inputs.masks = mixup_masks
+
+        # Update inputs
+        inputs.image = mixup_img.clamp(0, 1)
+        inputs.bboxes = tv_tensors.BoundingBoxes(mixup_bboxes, format="XYXY", canvas_size=(target_h, target_w))
+        inputs.label = mixup_labels
+        inputs.img_info = _resized_crop_image_info(inputs.img_info, (target_h, target_w))
+
+        return inputs
+
+    def __repr__(self) -> str:
+        return (
+            f"{self.__class__.__name__}("
+            f"img_scale={self.img_scale}, "
+            f"ratio_range={self.ratio_range}, "
+            f"flip_ratio={self.flip_ratio}, "
+            f"pad_val={self.pad_val}, "
+            f"max_iters={self.max_iters}, "
+            f"bbox_clip_border={self.bbox_clip_border}, "
+            f"max_cached_images={self.max_cached_images}, "
+            f"random_pop={self.random_pop}, "
+            f"prob={self.prob}, "
+            f"mix_ratio={self.mix_ratio})"
+        )
+
+
+class RandomIoUCrop(tvt_v2.RandomIoUCrop):
+    """Random IoU crop with the option to set probability.
+
+    Args:
+        min_scale (float, optional): the same as RandomIoUCrop. Defaults to 0.3.
+        max_scale (float, optional): the same as RandomIoUCrop. Defaults to 1.
+        min_aspect_ratio (float, optional): the same as RandomIoUCrop. Defaults to 0.5.
+        max_aspect_ratio (float, optional): the same as RandomIoUCrop. Defaults to 2.
+        sampler_options (list[float] | None, optional): the same as RandomIoUCrop. Defaults to None.
+        trials (int, optional): the same as RandomIoUCrop. Defaults to 40.
+        probability (float, optional): probability. Defaults to 1.0.
+    """
+
+    def __init__(
+        self,
+        min_scale: float = 0.3,
+        max_scale: float = 1,
+        min_aspect_ratio: float = 0.5,
+        max_aspect_ratio: float = 2,
+        sampler_options: list[float] | None = None,
+        trials: int = 40,
+        probability: float = 1.0,
+    ):
+        super().__init__(
+            min_scale,
+            max_scale,
+            min_aspect_ratio,
+            max_aspect_ratio,
+            sampler_options,
+            trials,
+        )
+        self.p = probability
+
+    def __call__(self, *inputs: Any) -> Any:  # noqa: ANN401
+        """Apply the transform to the given inputs."""
+        if torch.rand(1) >= self.p:
+            return inputs if len(inputs) > 1 else inputs[0]
+
+        return super().__call__(*inputs)
diff --git a/library/src/otx/data/dataset/base.py b/library/src/otx/data/dataset/base.py
index f4df6c57e9f..7a78020ef43 100644
--- a/library/src/otx/data/dataset/base.py
+++ b/library/src/otx/data/dataset/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2025 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Base class for OTXDataset using new Datumaro experimental Dataset."""
@@ -6,31 +6,32 @@
 from __future__ import annotations
 
 import abc
+from functools import partial
 from typing import TYPE_CHECKING, Callable, Iterable, List, Union
 
-import numpy as np
 import torch
 from torch.utils.data import Dataset as TorchDataset
+from torchvision.transforms.v2 import Compose
+from torchvision.transforms.v2 import functional as f
 
 from otx import LabelInfo, NullLabelInfo
-
-if TYPE_CHECKING:
-    from datumaro.experimental import Dataset
-
+from otx.data.augmentation.pipeline import CPUAugmentationPipeline
 from otx.data.entity.sample import OTXSample, OTXSampleBatch
-from otx.data.transform_libs.torchvision import Compose
 from otx.types import OTXTaskType
 
-Transforms = Union[Compose, Callable, List[Callable], dict[str, Compose | Callable | List[Callable]]]
+if TYPE_CHECKING:
+    from datumaro.experimental import Dataset
 
-RNG = np.random.default_rng(42)
+Transforms = Union[
+    Compose, Callable, List[Callable], dict[str, Compose | Callable | List[Callable]], "CPUAugmentationPipeline"
+]
 
 
 def _ensure_chw_format(img: torch.Tensor) -> torch.Tensor:
     """Ensure image tensor is in CHW format with 3 channels.
 
     Args:
-        img: Image tensor that may be in HWC or CHW format
+        img: Image tensor that may be in HWC, HCW, or CHW format
 
     Returns:
         Image tensor in CHW format (C, H, W) for 3D or (B, C, H, W) for 4D with 3 channels
@@ -45,6 +46,10 @@ def _ensure_chw_format(img: torch.Tensor) -> torch.Tensor:
         if img.shape[-1] in (1, 3, 4) and img.shape[0] > 4:
             # HWC format detected, convert to CHW
             img = img.permute(2, 0, 1)
+        # Check for HCW format: channels in the middle dimension
+        elif img.shape[1] in (1, 3, 4) and img.shape[0] > 4 and img.shape[2] > 4:
+            # HCW format detected, convert to CHW
+            img = img.permute(1, 0, 2)
         # If 4 channels (RGBA), convert to 3 channels (RGB)
         if img.shape[0] == 4:
             img = img[:3]
@@ -61,11 +66,13 @@ def _collect_optional_attr(items: list[OTXSample], attr_name: str) -> list | Non
     return values if any(value is not None for value in values) else None
 
 
-def _default_collate_fn(items: list[OTXSample]) -> OTXSampleBatch:
+def _default_collate_fn(items: list[OTXSample], stack_images: bool = True) -> OTXSampleBatch:
     """Collate OTXSample items into an OTXSampleBatch.
 
     Args:
         items: List of OTXSample items to batch
+        stack_images: Whether to stack images into a single tensor or keep as a list of tensors. Defaults to True.
+
     Returns:
         Batched OTXSample items with stacked tensors
     """
@@ -73,30 +80,34 @@ def _default_collate_fn(items: list[OTXSample]) -> OTXSampleBatch:
     image_tensors = []
     for item in items:
         img = item.image
-        if isinstance(img, torch.Tensor):
-            # Convert to float32 if not already
-            if img.dtype != torch.float32:
-                img = img.float()
-        else:
-            # Convert numpy array to float32 tensor
-            img = torch.from_numpy(img).float()
-        # Ensure image is in CHW format
-        img = _ensure_chw_format(img)
+        # All images should already be tensors from the pipeline
+        if not isinstance(img, torch.Tensor):
+            msg = (
+                f"Expected torch.Tensor but got {type(img)}. "
+                "Images should be converted to tensors in the dataset pipeline."
+            )
+            raise TypeError(msg)
+        # Convert to float32 if not already.
+        # For int32/int16 tensors (16-bit images) the intensity transform should
+        # have already produced float32 in [0,1].  If we get here with an integer
+        # dtype it means the intensity transform is missing — raise an error
+        # instead of silently producing wrong values.
+        if img.dtype != torch.float32:
+            if img.dtype in (torch.int32, torch.int16, torch.int64):
+                msg = (
+                    f"Image tensor has dtype {img.dtype} which looks like a high-bit-depth image "
+                    "that was not converted to float32. Please configure an intensity transform "
+                    "(IntensityConfig) in the recipe to map raw pixel values to [0, 1] float32."
+                )
+                raise TypeError(msg)
+            # uint8 → float32 [0, 1]
+            img = img.float().div_(255.0)
         image_tensors.append(img)
 
-    # Try to stack images if they have the same shape
-    if len(image_tensors) > 0 and all(t.shape == image_tensors[0].shape for t in image_tensors):
-        images = torch.stack(image_tensors)
-        # Safety: ensure stacked tensor is BCHW. If it's in BHWC or BHCW, fix it.
-        if images.ndim == 4:
-            # BHWC -> BCHW
-            if images.shape[1] not in (1, 3) and images.shape[-1] in (1, 3):
-                images = images.permute(0, 3, 1, 2)
-            # BHCW -> BCHW (channels at dim=2)
-            elif images.shape[2] in (1, 3) and images.shape[1] not in (1, 3):
-                images = images.permute(0, 2, 1, 3)
-    else:
-        images = image_tensors
+    if len(image_tensors) == 0:
+        msg = "No images found in batch. Ensure that the dataset and pipeline are configured correctly."
+        raise ValueError(msg)
+    images = torch.stack(image_tensors) if stack_images else image_tensors
 
     return OTXSampleBatch(
         images=images,
@@ -115,12 +126,11 @@ class OTXDataset(TorchDataset):
     functionality for data transformation, image decoding, and label handling.
 
     Args:
-        dm_subset (DmDataset): Datumaro subset of a dataset.
+        dm_subset (Dataset): Datumaro subset of a dataset.
         transforms (Transforms, optional): Transformations to apply to the data.
         max_refetch (int, optional): Maximum number of times to attempt fetching a valid image. Defaults to 1000.
         stack_images (bool, optional): Whether to stack images in the collate function in OTXBatchData entity.
             Defaults to True.
-        to_tv_image (bool, optional): Whether to convert images to TorchVision format. Defaults to True.
 
     """
 
@@ -130,13 +140,9 @@ def __init__(
         transforms: Transforms | None = None,
         max_refetch: int = 1000,
         stack_images: bool = True,
-        to_tv_image: bool = True,
-        sample_type: type[OTXSample] = OTXSample,
     ) -> None:
         self.transforms = transforms
         self.stack_images = stack_images
-        self.to_tv_image = to_tv_image
-        self.sample_type = sample_type
         self.max_refetch = max_refetch
         self.label_info: LabelInfo = NullLabelInfo()
         self.dm_subset = dm_subset
@@ -145,12 +151,33 @@ def __len__(self) -> int:
         return len(self.dm_subset)
 
     def _apply_transforms(self, entity: OTXSample) -> OTXSample | None:
+        # Intensity mapping: convert raw pixels to float32 [0, 1].
+        #
+        # When a CPUAugmentationPipeline is used the pipeline itself prepends
+        # the correct intensity transform (built from IntensityConfig), so we
+        # must NOT scale here — the intensity transform will do it.
+        #
+        # For legacy paths (Compose, callable) or when no transforms are set we
+        # keep the original uint8-only scaling as a safe default.
+        if not isinstance(self.transforms, CPUAugmentationPipeline):
+            # Legacy path: always scale assuming uint8 input (backward-compat)
+            entity.image = f.to_dtype(entity.image, dtype=torch.float32, scale=True)
+
         if self.transforms is None:
             return entity
+
+        if isinstance(self.transforms, CPUAugmentationPipeline):
+            return self.transforms(entity)
+
+        # Legacy path: Compose
         if isinstance(self.transforms, Compose):
             return self.transforms(entity)
+
+        # Legacy path: Iterable of transforms
         if isinstance(self.transforms, Iterable):
             return self._iterable_transforms(entity)
+
+        # Legacy path: Single callable
         if callable(self.transforms):
             return self.transforms(entity)
         return None
@@ -162,13 +189,21 @@ def _iterable_transforms(self, item: OTXSample) -> OTXSample | None:
         results = item
         for transform in self.transforms:
             results = transform(results)
-            # MMCV transform can produce None. Please see
-            # https://github.com/open-mmlab/mmengine/blob/26f22ed283ae4ac3a24b756809e5961efe6f9da8/mmengine/dataset/base_dataset.py#L59-L66
             if results is None:
                 return None
 
         return results
 
+    def _read_dm_item(self, index: int) -> OTXSample:
+        """Read an item from the datumaro subset with guaranteed CHW image format."""
+        item = self.dm_subset[index]
+        # Workaround for a datumaro bug: ``TensorField.from_polars()`` applies
+        # ``np.transpose(data, (2, 0, 1))`` to undo the export transpose, but
+        # the correct inverse of ``(2, 0, 1)`` is ``(1, 2, 0)``.  As a result,
+        # images come back as HWC instead of the original CHW.
+        item.image = _ensure_chw_format(item.image)
+        return item
+
     def __getitem__(self, index: int) -> OTXSample:
         for _ in range(self.max_refetch):
             results = self._get_item_impl(index)
@@ -176,19 +211,18 @@ def __getitem__(self, index: int) -> OTXSample:
             if results is not None:
                 return results
 
-            index = RNG.integers(0, len(self))
-
+            index = torch.randint(0, len(self), (1,)).item()
         msg = f"Reach the maximum refetch number ({self.max_refetch})"
         raise RuntimeError(msg)
 
     def _get_item_impl(self, index: int) -> OTXSample | None:
-        dm_item = self.dm_subset[index]
+        dm_item = self._read_dm_item(index)
         return self._apply_transforms(dm_item)
 
     @property
     def collate_fn(self) -> Callable:
         """Collection function to collect samples into a batch in data loader."""
-        return _default_collate_fn
+        return partial(_default_collate_fn, stack_images=self.stack_images)
 
     @abc.abstractmethod
     def get_idx_list_per_classes(self, use_string_label: bool = False) -> dict[int | str, list[int]]:
diff --git a/library/src/otx/data/dataset/classification.py b/library/src/otx/data/dataset/classification.py
index a9cf653645c..a0c653fa7fa 100644
--- a/library/src/otx/data/dataset/classification.py
+++ b/library/src/otx/data/dataset/classification.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Module for OTXClassificationDatasets using new Datumaro experimental Dataset."""
@@ -9,7 +9,6 @@
 
 import torch
 from torch.nn import functional
-from torchvision.transforms.v2.functional import to_dtype, to_image
 
 from otx import HLabelInfo, LabelInfo
 from otx.data.dataset.base import OTXDataset, Transforms
@@ -18,6 +17,7 @@
     ClassificationMultiLabelSample,
     ClassificationSample,
 )
+from otx.data.entity.utils import with_image_dtype
 from otx.types import OTXTaskType
 
 if TYPE_CHECKING:
@@ -33,11 +33,11 @@ class OTXMulticlassClsDataset(OTXDataset):
     multi-class classification training and inference.
 
     Args:
-        dm_subset (DmDataset): Datumaro dataset subset containing the data items.
+        dm_subset (Dataset): Datumaro dataset subset containing the data items.
         transforms (Transforms, optional): Transformations to apply to the data.
         max_refetch (int): Maximum number of retries when fetching a data item fails.
         stack_images (bool): Whether to stack images in batch processing.
-        to_tv_image (bool): Whether to convert images to torchvision format.
+
 
     Raises:
         ValueError: If an image has multiple labels (multi-label case).
@@ -57,17 +57,15 @@ def __init__(
         transforms: Transforms | None = None,
         max_refetch: int = 1000,
         stack_images: bool = True,
-        to_tv_image: bool = True,
+        storage_dtype: str = "uint8",
     ) -> None:
-        sample_type = ClassificationSample
+        sample_type = with_image_dtype(ClassificationSample, storage_dtype)
         dm_subset = dm_subset.convert_to_schema(sample_type)
         super().__init__(
             dm_subset=dm_subset,
-            sample_type=sample_type,
             transforms=transforms,
             max_refetch=max_refetch,
             stack_images=stack_images,
-            to_tv_image=to_tv_image,
         )
 
         labels = list(dm_subset.schema.attributes["label"].categories.labels)  # type: ignore[missing-attribute]
@@ -117,7 +115,7 @@ class OTXMultilabelClsDataset(OTXDataset):
         transforms (Transforms, optional): Transform operations to apply to the data items.
         max_refetch (int): Maximum number of retries when fetching a data item fails.
         stack_images (bool): Whether to stack images in batch processing.
-        to_tv_image (bool): Whether to convert images to torchvision format.
+
 
     Attributes:
         num_classes (int): Number of classes in the dataset.
@@ -137,16 +135,15 @@ def __init__(
         transforms: Transforms | None = None,
         max_refetch: int = 1000,
         stack_images: bool = True,
-        to_tv_image: bool = True,
+        storage_dtype: str = "uint8",
     ) -> None:
-        sample_type = ClassificationMultiLabelSample
+        sample_type = with_image_dtype(ClassificationMultiLabelSample, storage_dtype)
         dm_subset = dm_subset.convert_to_schema(sample_type)
         super().__init__(
             dm_subset=dm_subset,
             transforms=transforms,
             max_refetch=max_refetch,
             stack_images=stack_images,
-            to_tv_image=to_tv_image,
         )
 
         labels = list(dm_subset.schema.attributes["label"].categories.labels)  # type: ignore[missing-attribute]
@@ -158,8 +155,7 @@ def __init__(
         self.num_classes = len(labels)
 
     def _get_item_impl(self, index: int) -> ClassificationMultiLabelSample | None:
-        item = self.dm_subset[index]
-        item.image = to_dtype(to_image(item.image), dtype=torch.float32)
+        item = self._read_dm_item(index)
         item.label = self._convert_to_onehot(torch.as_tensor(list(item.label)), ignored_labels=[])
         return self._apply_transforms(item)
 
@@ -225,7 +221,7 @@ class OTXHlabelClsDataset(OTXDataset):
         transforms (Transforms, optional): Transform operations to apply to the data items.
         max_refetch (int): Maximum number of retries when fetching a data item fails.
         stack_images (bool): Whether to stack images in batch processing.
-        to_tv_image (bool): Whether to convert images to torchvision format.
+
 
     Attributes:
         label_info (HLabelInfo): HLabelInfo containing hierarchical label structure information.
@@ -250,20 +246,18 @@ def __init__(
         transforms: Transforms | None = None,
         max_refetch: int = 1000,
         stack_images: bool = True,
-        to_tv_image: bool = True,
+        storage_dtype: str = "uint8",
     ) -> None:
-        sample_type = ClassificationHierarchicalSample
+        sample_type = with_image_dtype(ClassificationHierarchicalSample, storage_dtype)
         label_categories = dm_subset.schema.attributes["label"].categories
         # Pass target categories manually to ensure HierarchicalLabelCategories are not converted to regular Labels.
         target_categories = {"label": label_categories} if label_categories is not None else None
         dm_subset = dm_subset.convert_to_schema(sample_type, target_categories=target_categories)  # type: ignore[arg-type]
         super().__init__(
             dm_subset=dm_subset,
-            sample_type=sample_type,
             transforms=transforms,
             max_refetch=max_refetch,
             stack_images=stack_images,
-            to_tv_image=to_tv_image,
         )
         self.dm_categories: HierarchicalLabelCategories = dm_subset.schema.attributes["label"].categories  # type: ignore[assignment]
         self.label_info = HLabelInfo.from_dm_label_groups(self.dm_categories)
@@ -276,8 +270,7 @@ def __init__(
             raise ValueError(msg)
 
     def _get_item_impl(self, index: int) -> ClassificationHierarchicalSample | None:
-        item = self.dm_subset[index]
-        item.image = to_dtype(to_image(item.image), dtype=torch.float32)
+        item = self._read_dm_item(index)
         item.label = torch.as_tensor(self._convert_label_to_hlabel_format(list(item.label), []))
         return self._apply_transforms(item)
 
diff --git a/library/src/otx/data/dataset/detection.py b/library/src/otx/data/dataset/detection.py
index 3f223637a99..d8d519a469d 100644
--- a/library/src/otx/data/dataset/detection.py
+++ b/library/src/otx/data/dataset/detection.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Module for OTXDetectionDataset."""
@@ -11,6 +11,7 @@
 from otx.data.dataset.base import OTXDataset, Transforms
 from otx.data.dataset.mixins import DataAugSwitchMixin
 from otx.data.entity.sample import DetectionSample
+from otx.data.entity.utils import with_image_dtype
 from otx.types.label import LabelInfo
 
 if TYPE_CHECKING:
@@ -29,7 +30,7 @@ class OTXDetectionDataset(OTXDataset, DataAugSwitchMixin):
         transforms (Transforms | None, optional): Transform operations to apply to the data items.
         max_refetch (int): Maximum number of retries when fetching a data item fails.
         stack_images (bool): Whether to stack images in batch processing.
-        to_tv_image (bool): Whether to convert images to torchvision format.
+
 
     Example:
         >>> from otx.data.dataset.detection import OTXDetectionDataset
@@ -46,23 +47,20 @@ def __init__(
         transforms: Transforms | None = None,
         max_refetch: int = 1000,
         stack_images: bool = True,
-        to_tv_image: bool = True,
+        storage_dtype: str = "uint8",
     ) -> None:
-        sample_type = DetectionSample
+        sample_type = with_image_dtype(DetectionSample, storage_dtype)
         dm_subset = dm_subset.convert_to_schema(sample_type)
         super().__init__(
             dm_subset=dm_subset,
-            sample_type=sample_type,
             transforms=transforms,
             max_refetch=max_refetch,
             stack_images=stack_images,
-            to_tv_image=to_tv_image,
         )
-
-        labels = list(dm_subset.schema.attributes["label"].categories.labels)
+        labels = dm_subset.schema.attributes["label"].categories.labels
         self.label_info = LabelInfo(
-            label_names=labels,
-            label_groups=[labels],
+            label_names=list(labels),
+            label_groups=[list(labels)],
             label_ids=[str(i) for i in range(len(labels))],
         )
 
diff --git a/library/src/otx/data/dataset/instance_segmentation.py b/library/src/otx/data/dataset/instance_segmentation.py
index 74c7e658a1c..1b83f930f77 100644
--- a/library/src/otx/data/dataset/instance_segmentation.py
+++ b/library/src/otx/data/dataset/instance_segmentation.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Module for OTXInstanceSegDataset."""
@@ -10,6 +10,7 @@
 from otx import LabelInfo
 from otx.data.dataset.base import OTXDataset, Transforms
 from otx.data.entity.sample import InstanceSegmentationSample
+from otx.data.entity.utils import with_image_dtype
 from otx.types import OTXTaskType
 
 if TYPE_CHECKING:
@@ -27,7 +28,7 @@ class OTXInstanceSegDataset(OTXDataset):
         task_type (OTXTaskType, optional): The task type. Defaults to INSTANCE_SEGMENTATION.
         max_refetch (int, optional): Maximum number of times to refetch data. Defaults to 1000.
         stack_images (bool, optional): Whether to stack images. Defaults to True.
-        to_tv_image (bool, optional): Whether to convert images to torchvision format. Defaults to True.
+
     """
 
     def __init__(
@@ -37,17 +38,15 @@ def __init__(
         task_type: OTXTaskType = OTXTaskType.INSTANCE_SEGMENTATION,
         max_refetch: int = 1000,
         stack_images: bool = True,
-        to_tv_image: bool = True,
+        storage_dtype: str = "uint8",
     ) -> None:
-        sample_type = InstanceSegmentationSample
+        sample_type = with_image_dtype(InstanceSegmentationSample, storage_dtype)
         dm_subset = dm_subset.convert_to_schema(sample_type)
         super().__init__(
             dm_subset=dm_subset,
-            sample_type=sample_type,
             transforms=transforms,
             max_refetch=max_refetch,
             stack_images=stack_images,
-            to_tv_image=to_tv_image,
         )
 
         labels = list(dm_subset.schema.attributes["label"].categories.labels)
diff --git a/library/src/otx/data/dataset/keypoint_detection.py b/library/src/otx/data/dataset/keypoint_detection.py
index dc9b8881b0d..30231db07f8 100644
--- a/library/src/otx/data/dataset/keypoint_detection.py
+++ b/library/src/otx/data/dataset/keypoint_detection.py
@@ -1,24 +1,20 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Module for OTXKeypointDetectionDataset."""
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Callable, List, Union
+from typing import TYPE_CHECKING
 
 import torch
-from torchvision.transforms.v2.functional import to_dtype, to_image
 
+from otx.data.dataset.base import OTXDataset, Transforms
 from otx.data.entity.sample import KeypointSample
-from otx.data.transform_libs.torchvision import Compose
+from otx.data.entity.utils import with_image_dtype
 from otx.types import OTXTaskType
 from otx.types.label import LabelInfo
 
-from .base import OTXDataset
-
-Transforms = Union[Compose, Callable, List[Callable], dict[str, Compose | Callable | List[Callable]]]
-
 if TYPE_CHECKING:
     from datumaro.experimental import Dataset
 
@@ -37,7 +33,7 @@ class OTXKeypointDetectionDataset(OTXDataset):
         max_refetch (int, optional): Maximum number of retries when fetching a data item fails.
         image_color_channel (ImageColorChannel, optional): Color channel format for images (RGB, BGR, etc.).
         stack_images (bool, optional): Whether to stack images in batch processing.
-        to_tv_image (bool, optional): Whether to convert images to torchvision format.
+
 
     Example:
         >>> from otx.data.dataset.keypoint_detection import OTXKeypointDetectionDataset
@@ -54,17 +50,15 @@ def __init__(
         transforms: Transforms | None = None,
         max_refetch: int = 1000,
         stack_images: bool = True,
-        to_tv_image: bool = True,
+        storage_dtype: str = "uint8",
     ) -> None:
-        sample_type = KeypointSample
+        sample_type = with_image_dtype(KeypointSample, storage_dtype)
         dm_subset = dm_subset.convert_to_schema(sample_type)
         super().__init__(
             dm_subset=dm_subset,
-            sample_type=sample_type,
             transforms=transforms,
             max_refetch=max_refetch,
             stack_images=stack_images,
-            to_tv_image=to_tv_image,
         )
         labels = dm_subset.schema.attributes["keypoints"].categories.labels
         self.label_info = LabelInfo(
@@ -74,16 +68,8 @@ def __init__(
         )
 
     def _get_item_impl(self, index: int) -> KeypointSample | None:
-        item = self.dm_subset[index]
-        keypoints = item.keypoints
-        keypoints[:, 2] = torch.clamp(keypoints[:, 2], max=1)  # OTX represents visibility as 0 or 1
-        item.keypoints = keypoints
-        # Handle image conversion - to_image only permutes numpy arrays, not tensors
-        image = item.image
-        if isinstance(image, torch.Tensor) and image.ndim == 3 and image.shape[-1] in (1, 3):
-            # Image is in HWC format, convert to CHW
-            image = image.permute(2, 0, 1)
-        item.image = to_dtype(to_image(image), torch.float32)
+        item = self._read_dm_item(index)
+        item.keypoints[:, 2] = torch.clamp(item.keypoints[:, 2], max=1)  # OTX represents visibility as 0 or 1
         return self._apply_transforms(item)  # type: ignore[return-value]
 
     @property
diff --git a/library/src/otx/data/dataset/mixins.py b/library/src/otx/data/dataset/mixins.py
index fbe0a0f3b9f..3ded85f2178 100644
--- a/library/src/otx/data/dataset/mixins.py
+++ b/library/src/otx/data/dataset/mixins.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Mixins for OTX datasets."""
@@ -12,25 +12,24 @@
 
 
 class DataAugSwitchMixin:
-    """Mixin class that provides dynamic augmentation switching functionality.
+    """Mixin that provides dynamic augmentation switching for the CPU/GPU pipeline.
 
-    This mixin can be used by any dataset that needs to switch between different
-    augmentation policies during training based on epoch information.
+    At each ``__getitem__`` call the dataset checks the current policy from
+    ``DataAugSwitch`` and swaps ``self.transforms`` to the corresponding
+    ``CPUAugmentationPipeline``. GPU augmentations are swapped separately by
+    ``AugmentationSchedulerCallback`` at epoch boundaries.
+
+    Usage::
 
-    Usage:
         class MyDataset(OTXDataset, DataAugSwitchMixin):
-            def _get_item_impl(self, index: int) -> OTXSample | None:
-                # ... get your data ...
-                self._apply_augmentation_switch()
-                return self._apply_transforms(entity)
+            def _apply_transforms(self, entity):
+                if self.has_dynamic_augmentation:
+                    self._apply_augmentation_switch()
+                return super()._apply_transforms(entity)
     """
 
     def _ensure_data_aug_switch_initialized(self) -> None:
-        """Ensure data_aug_switch attribute is initialized.
-
-        This method is called lazily since __init__ may not be called
-        due to multiple inheritance MRO in some dataset classes.
-        """
+        """Ensure data_aug_switch attribute is initialized."""
         if not hasattr(self, "data_aug_switch"):
             self.data_aug_switch: DataAugSwitch | None = None
 
@@ -38,27 +37,25 @@ def set_data_aug_switch(self, data_aug_switch: DataAugSwitch) -> None:
         """Set data augmentation switch.
 
         Args:
-            data_aug_switch: DataAugSwitch instance that manages dynamic augmentation policies
+            data_aug_switch: DataAugSwitch instance that manages dynamic augmentation policies.
         """
         self._ensure_data_aug_switch_initialized()
         self.data_aug_switch = data_aug_switch
 
     def _apply_augmentation_switch(self) -> str | None:
-        """Update the dataset's transform configuration based on the current augmentation policy.
+        """Swap ``self.transforms`` to the active policy's CPU pipeline.
 
-        This method should be called before applying the regular transforms.
-        It updates the dataset's transform configuration based on the current
-        augmentation policy from DataAugSwitch, if available.
+        Called before ``_apply_transforms`` in each ``__getitem__``.
+        Only swaps the CPU pipeline; GPU pipeline is handled by the callback.
 
         Returns:
-            str | None: The name of the current policy, or None if no policy is set.
+            Policy name, or None if no switch is configured.
         """
         self._ensure_data_aug_switch_initialized()
         if self.data_aug_switch is None:
             return None
         policy_name = self.data_aug_switch.current_policy_name
-        policy = self.data_aug_switch.policies[policy_name]
-        self.to_tv_image, self.transforms = policy["to_tv_image"], policy["transforms"]
+        self.transforms = self.data_aug_switch.get_cpu_pipeline(policy_name)
         return policy_name
 
     @property
diff --git a/library/src/otx/data/dataset/segmentation.py b/library/src/otx/data/dataset/segmentation.py
index 04362f581ce..3b72814fe15 100644
--- a/library/src/otx/data/dataset/segmentation.py
+++ b/library/src/otx/data/dataset/segmentation.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2025 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Module for OTXSegmentationDataset."""
@@ -10,6 +10,7 @@
 from otx import SegLabelInfo
 from otx.data.dataset.base import OTXDataset, Transforms
 from otx.data.entity.sample import SegmentationSample
+from otx.data.entity.utils import with_image_dtype
 from otx.types import OTXTaskType
 
 if TYPE_CHECKING:
@@ -29,7 +30,7 @@ class OTXSegmentationDataset(OTXDataset):
         max_refetch: Maximum number of retries when fetching a data item fails.
         image_color_channel: Color channel format for images (RGB, BGR, etc.).
         stack_images: Whether to stack images in batch processing.
-        to_tv_image: Whether to convert images to torchvision format.
+
         ignore_index: Index value for pixels to be ignored during training.
 
     Attributes:
@@ -50,17 +51,16 @@ def __init__(
         dm_subset: Dataset,
         transforms: Transforms | None = None,
         max_refetch: int = 1000,
-        to_tv_image: bool = True,
         ignore_index: int = 255,
+        data_format: str = "",
+        storage_dtype: str = "uint8",
     ) -> None:
-        sample_type = SegmentationSample
+        sample_type = with_image_dtype(SegmentationSample, storage_dtype)
         dm_subset = dm_subset.convert_to_schema(sample_type)
         super().__init__(
             dm_subset=dm_subset,
             transforms=transforms,
             max_refetch=max_refetch,
-            to_tv_image=to_tv_image,
-            sample_type=sample_type,
         )
 
         labels = list(dm_subset.schema.attributes["masks"].categories.labels)
diff --git a/library/src/otx/data/dataset/tile.py b/library/src/otx/data/dataset/tile.py
index d05c3f1117d..3c3ec69a8bd 100644
--- a/library/src/otx/data/dataset/tile.py
+++ b/library/src/otx/data/dataset/tile.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """OTX tile dataset."""
@@ -23,7 +23,7 @@
 )
 from otx.types.task import OTXTaskType
 
-from .base import OTXDataset
+from .base import OTXDataset, _ensure_chw_format
 
 if TYPE_CHECKING:
     from otx.config.data import TileConfig
@@ -59,10 +59,10 @@ def create(
         Returns:
             OTXTileDataset: Tile dataset.
         """
-        subset = dataset.dm_subset[0].subset
+        subset = dataset.dm_subset[0].subset  # type: ignore[attr-defined]
         if subset == Subset.TRAINING:
             dm_dataset = dataset.dm_subset
-            dm_dataset = dm_dataset.transform(
+            dm_dataset = dm_dataset.transform(  # type: ignore[attr-defined]
                 create_tiling_transform(
                     TilingConfig(
                         tile_height=tile_config.tile_size[0],
@@ -72,9 +72,9 @@ def create(
                     ),
                     threshold_drop_ann=0.5,
                 ),
-                dtype=dm_dataset.dtype,
+                dtype=dm_dataset.dtype,  # type: ignore[attr-defined]
             )
-            dm_dataset = dm_dataset.transform(create_filtering_transform(), dtype=dm_dataset.dtype)
+            dm_dataset = dm_dataset.transform(create_filtering_transform(), dtype=dm_dataset.dtype)  # type: ignore[attr-defined]
             dataset.dm_subset = dm_dataset
             return dataset
 
@@ -103,7 +103,6 @@ def __init__(self, dataset: OTXDataset, tile_config: TileConfig, subset: Subset)
             dataset.transforms,
             dataset.max_refetch,
             dataset.stack_images,
-            dataset.to_tv_image,
         )
         self.tile_config = tile_config
         self._dataset = dataset
@@ -145,8 +144,8 @@ def get_tiles(
             A tuple containing two lists:
             - tile_entities (list[OTXSample]): List of tile entities.
         """
-        parent_slice_ds = self.dm_subset.slice(parent_idx, 1)
-        tile_ds = parent_slice_ds.transform(
+        parent_slice_ds = self.dm_subset.slice(parent_idx, 1)  # type: ignore[attr-defined]
+        tile_ds = parent_slice_ds.transform(  # type: ignore[attr-defined]
             create_tiling_transform(
                 TilingConfig(
                     tile_height=self.tile_config.tile_size[0],
@@ -156,12 +155,12 @@ def get_tiles(
                 ),
                 threshold_drop_ann=0.5,
             ),
-            dtype=parent_slice_ds.dtype,
+            dtype=parent_slice_ds.dtype,  # type: ignore[attr-defined]
         )
 
         if self._subset == Subset.VALIDATION:
             # NOTE: filter validation tiles with annotations only to avoid evaluation on empty tiles.
-            tile_ds = tile_ds.transform(create_filtering_transform(), dtype=parent_slice_ds.dtype)
+            tile_ds = tile_ds.transform(create_filtering_transform(), dtype=parent_slice_ds.dtype)  # type: ignore[attr-defined]
 
             # if tile dataset is empty it means objects are too big to fit in any tile, in this case include full image
             if len(tile_ds) == 0:
@@ -169,6 +168,8 @@ def get_tiles(
 
         tile_entities: list[OTXSample] = []
         for tile in tile_ds:
+            # Fix datumaro HWC→CHW format issue for tile images
+            tile.image = _ensure_chw_format(tile.image)
             # apply the same transforms as the original dataset
             object.__setattr__(tile.tile, "source_sample_idx", parent_idx)
             transformed_tile = self._apply_transforms(tile)
@@ -220,9 +221,9 @@ def _get_item_impl(self, index: int) -> TileDetDataEntity:  # type: ignore[overr
         return TileDetDataEntity(
             num_tiles=len(tile_entities),
             entity_list=tile_entities,
-            ori_img_info=item.img_info,
-            ori_bboxes=item.bboxes,
-            ori_labels=item.label,
+            ori_img_info=item.img_info,  # type: ignore[attr-defined]
+            ori_bboxes=item.bboxes,  # type: ignore[attr-defined]
+            ori_labels=item.label,  # type: ignore[attr-defined]
         )
 
 
@@ -268,10 +269,10 @@ def _get_item_impl(self, index: int) -> TileInstSegDataEntity:  # type: ignore[o
         return TileInstSegDataEntity(
             num_tiles=len(tile_entities),
             entity_list=tile_entities,
-            ori_img_info=item.img_info,
-            ori_bboxes=item.bboxes,
-            ori_labels=item.label,
-            ori_masks=item.masks,
+            ori_img_info=item.img_info,  # type: ignore[attr-defined]
+            ori_bboxes=item.bboxes,  # type: ignore[attr-defined]
+            ori_labels=item.label,  # type: ignore[attr-defined]
+            ori_masks=item.masks,  # type: ignore[attr-defined]
         )
 
 
@@ -312,6 +313,6 @@ def _get_item_impl(self, index: int) -> TileSegDataEntity:  # type: ignore[overr
         return TileSegDataEntity(
             num_tiles=len(tile_entities),
             entity_list=tile_entities,
-            ori_img_info=item.img_info,
-            ori_masks=item.masks,
+            ori_img_info=item.img_info,  # type: ignore[attr-defined]
+            ori_masks=item.masks,  # type: ignore[attr-defined]
         )
diff --git a/library/src/otx/data/entity/__init__.py b/library/src/otx/data/entity/__init__.py
index d6ab2cd2b52..23bc2910d30 100644
--- a/library/src/otx/data/entity/__init__.py
+++ b/library/src/otx/data/entity/__init__.py
@@ -1,15 +1,14 @@
-# Copyright (C) 2023-2025 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Module for OTX data entities."""
 
-from .base import ImageInfo, ImageType, Points
+from .base import ImageInfo, ImageType
 from .sample import (
     OTXPrediction,
     OTXPredictionBatch,
     OTXSample,
     OTXSampleBatch,
-    collate_fn,
 )
 from .tile import (
     TileBatchDetDataEntity,
@@ -26,12 +25,9 @@
     "OTXPredictionBatch",
     "OTXSample",
     "OTXSampleBatch",
-    "Points",
     "TileBatchDetDataEntity",
     "TileBatchInstSegDataEntity",
-    "TileBatchInstSegDataEntity",
     "TileBatchSegDataEntity",
     "TileDetDataEntity",
     "TileSegDataEntity",
-    "collate_fn",
 ]
diff --git a/library/src/otx/data/entity/base.py b/library/src/otx/data/entity/base.py
index 9b495ac6b6b..42ea97e2b78 100644
--- a/library/src/otx/data/entity/base.py
+++ b/library/src/otx/data/entity/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2025 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Module for OTX base data entities."""
@@ -7,12 +7,9 @@
 
 from typing import TYPE_CHECKING, Any, Dict
 
-import torch
-import torchvision.transforms.v2.functional as F  # noqa: N812
 from torch import Tensor
 from torch.utils._pytree import tree_flatten
 from torchvision import tv_tensors
-from torchvision.utils import _log_api_usage_once
 
 from otx.types.image import ImageType
 
@@ -20,39 +17,6 @@
     from collections.abc import Mapping
 
 
-def custom_wrap(wrappee: Tensor, *, like: tv_tensors.TVTensor, **kwargs) -> tv_tensors.TVTensor:
-    """Add `Points` in tv_tensors.wrap.
-
-    If `like` is
-        - tv_tensors.BoundingBoxes : the `format` and `canvas_size` of `like` are assigned to `wrappee`
-        - Points : the `canvas_size` of `like` is assigned to `wrappee`
-    Unless, they are passed as `kwargs`.
-
-    Args:
-        wrappee (Tensor): The tensor to convert.
-        like (tv_tensors.TVTensor): The reference. `wrappee` will be converted into the same subclass as `like`.
-        kwargs: Can contain "format" and "canvas_size" if `like` is a tv_tensor.BoundingBoxes,
-            or "canvas_size" if `like` is a `Points`. Ignored otherwise.
-    """
-    if isinstance(like, tv_tensors.BoundingBoxes):
-        return tv_tensors.BoundingBoxes._wrap(  # noqa: SLF001
-            wrappee,
-            format=kwargs.get("format", like.format),
-            canvas_size=kwargs.get("canvas_size", like.canvas_size),
-        )
-    elif isinstance(like, Points):  # noqa: RET505
-        return Points._wrap(wrappee, canvas_size=kwargs.get("canvas_size", like.canvas_size))  # noqa: SLF001
-
-    # # TODO(Vlad): remove this after torch upgrade. This workaround prevents a failure when like is also a Tensor
-    # if type(like) == type(wrappee):
-    #     return wrappee
-
-    return wrappee.as_subclass(type(like))
-
-
-tv_tensors.wrap = custom_wrap
-
-
 class ImageInfo(tv_tensors.TVTensor):
     """Meta info for image.
 
@@ -66,6 +30,9 @@ class ImageInfo(tv_tensors.TVTensor):
             it will lose the scaling information and be `None`.
         ignored_labels: Label that should be ignored in this image. Default to None.
         keep_ratio: If true, the image is resized while keeping the aspect ratio. Default to False.
+        bit_depth: Bits per channel of the *original* image before intensity mapping
+            (8 for uint8, 16 for uint16, etc.).  Set automatically by the intensity
+            transform in the CPU pipeline.  Default 8.
     """
 
     img_idx: int
@@ -75,6 +42,7 @@ class ImageInfo(tv_tensors.TVTensor):
     scale_factor: tuple[float, float] | None = (1.0, 1.0)
     ignored_labels: list[int]
     keep_ratio: bool = False
+    bit_depth: int = 8  # bits per channel of the original image (8 for uint8, 16 for uint16, etc.)
 
     @classmethod
     def _wrap(
@@ -88,6 +56,7 @@ def _wrap(
         scale_factor: tuple[float, float] | None = (1.0, 1.0),
         ignored_labels: list[int] | None = None,
         keep_ratio: bool = False,
+        bit_depth: int = 8,
     ) -> ImageInfo:
         image_info = dummy_tensor.as_subclass(cls)
         image_info.img_idx = img_idx
@@ -97,6 +66,7 @@ def _wrap(
         image_info.scale_factor = scale_factor
         image_info.ignored_labels = ignored_labels if ignored_labels else []
         image_info.keep_ratio = keep_ratio
+        image_info.bit_depth = bit_depth
         return image_info
 
     def __new__(  # noqa: D102
@@ -108,6 +78,7 @@ def __new__(  # noqa: D102
         scale_factor: tuple[float, float] | None = (1.0, 1.0),
         ignored_labels: list[int] | None = None,
         keep_ratio: bool = False,
+        bit_depth: int = 8,
     ) -> ImageInfo:
         return cls._wrap(
             dummy_tensor=Tensor(),
@@ -118,6 +89,7 @@ def __new__(  # noqa: D102
             scale_factor=scale_factor,
             ignored_labels=ignored_labels,
             keep_ratio=keep_ratio,
+            bit_depth=bit_depth,
         )
 
     @classmethod
@@ -148,6 +120,7 @@ def _wrap_output(
                 scale_factor=image_info.scale_factor,
                 ignored_labels=image_info.ignored_labels,
                 keep_ratio=image_info.keep_ratio,
+                bit_depth=image_info.bit_depth,
             )
         elif isinstance(output, (tuple, list)):
             image_infos = [x for x in flat_params if isinstance(x, ImageInfo)]
@@ -161,6 +134,7 @@ def _wrap_output(
                     scale_factor=image_info.scale_factor,
                     ignored_labels=image_info.ignored_labels,
                     keep_ratio=image_info.keep_ratio,
+                    bit_depth=image_info.bit_depth,
                 )
                 for dummy_tensor, image_info in zip(output, image_infos)
             )
@@ -175,252 +149,8 @@ def __repr__(self) -> str:
             f"padding={self.padding}, "
             f"scale_factor={self.scale_factor}, "
             f"ignored_labels={self.ignored_labels}, "
-            f"keep_ratio={self.keep_ratio})"
-        )
-
-
-@F.register_kernel(functional=F.resize, tv_tensor_cls=ImageInfo)
-def _resize_image_info(image_info: ImageInfo, size: list[int], **kwargs) -> ImageInfo:  # noqa: ARG001
-    """Register ImageInfo to TorchVision v2 resize kernel."""
-    if len(size) == 2:
-        image_info.img_shape = (size[0], size[1])
-    elif len(size) == 1:
-        image_info.img_shape = (size[0], size[0])
-    else:
-        raise ValueError(size)
-
-    ori_h, ori_w = image_info.ori_shape
-    new_h, new_w = image_info.img_shape
-    image_info.scale_factor = (new_h / ori_h, new_w / ori_w)
-    return image_info
-
-
-@F.register_kernel(functional=F.crop, tv_tensor_cls=ImageInfo)
-def _crop_image_info(
-    image_info: ImageInfo,
-    height: int,
-    width: int,
-    **kwargs,  # noqa: ARG001
-) -> ImageInfo:
-    """Register ImageInfo to TorchVision v2 resize kernel."""
-    image_info.img_shape = (height, width)
-    image_info.scale_factor = None
-    return image_info
-
-
-@F.register_kernel(functional=F.resized_crop, tv_tensor_cls=ImageInfo)
-def _resized_crop_image_info(
-    image_info: ImageInfo,
-    size: list[int],
-    **kwargs,  # noqa: ARG001
-) -> ImageInfo:
-    """Register ImageInfo to TorchVision v2 resize kernel."""
-    if len(size) == 2:
-        image_info.img_shape = (size[0], size[1])
-    elif len(size) == 1:
-        image_info.img_shape = (size[0], size[0])
-    else:
-        raise ValueError(size)
-
-    image_info.scale_factor = None
-    return image_info
-
-
-@F.register_kernel(functional=F.center_crop, tv_tensor_cls=ImageInfo)
-def _center_crop_image_info(
-    image_info: ImageInfo,
-    output_size: list[int],
-    **kwargs,  # noqa: ARG001
-) -> ImageInfo:
-    """Register ImageInfo to TorchVision v2 resize kernel."""
-    img_shape = F._geometry._center_crop_parse_output_size(output_size)  # noqa: SLF001
-    image_info.img_shape = (img_shape[0], img_shape[1])
-
-    image_info.scale_factor = None
-    return image_info
-
-
-@F.register_kernel(functional=F.pad, tv_tensor_cls=ImageInfo)
-def _pad_image_info(
-    image_info: ImageInfo,
-    padding: int | list[int],
-    **kwargs,  # noqa: ARG001
-) -> ImageInfo:
-    """Register ImageInfo to TorchVision v2 resize kernel."""
-    left, right, top, bottom = F._geometry._parse_pad_padding(padding)  # noqa: SLF001
-    height, width = image_info.img_shape
-    image_info.padding = (left, top, right, bottom)
-    image_info.img_shape = (height + top + bottom, width + left + right)
-    return image_info
-
-
-class Points(tv_tensors.TVTensor):
-    """`torch.Tensor` subclass for points.
-
-    Attributes:
-        data: Any data that can be turned into a tensor with `torch.as_tensor`.
-        canvas_size (two-tuple of ints): Height and width of the corresponding image or video.
-        dtype (torch.dtype, optional): Desired data type of the point. If omitted, will be inferred from `data`.
-        device (torch.device, optional): Desired device of the point. If omitted and `data` is a
-            `torch.Tensor`, the device is taken from it. Otherwise, the point is constructed on the CPU.
-        requires_grad (bool, optional): Whether autograd should record operations on the point. If omitted and
-            `data` is a `torch.Tensor`, the value is taken from it. Otherwise, defaults to `False`.
-    """
-
-    canvas_size: tuple[int, int]
-
-    @classmethod
-    def _wrap(cls, tensor: Tensor, *, canvas_size: tuple[int, int]) -> Points:
-        points = tensor.as_subclass(cls)
-        points.canvas_size = canvas_size
-        return points
-
-    def __new__(  # noqa: D102
-        cls,
-        data: Any,  # noqa: ANN401
-        *,
-        canvas_size: tuple[int, int],
-        dtype: torch.dtype | None = None,
-        device: torch.device | str | int | None = None,
-        requires_grad: bool | None = None,
-    ) -> Points:
-        tensor = cls._to_tensor(data, dtype=dtype, device=device, requires_grad=requires_grad)
-        return cls._wrap(tensor, canvas_size=canvas_size)
-
-    @classmethod
-    def _wrap_output(
-        cls,
-        output: Tensor,
-        args: tuple[()] = (),
-        kwargs: Mapping[str, Any] | None = None,
-    ) -> Points:
-        flat_params, _ = tree_flatten(args + (tuple(kwargs.values()) if kwargs else ()))
-        first_point_from_args = next(x for x in flat_params if isinstance(x, Points))
-        canvas_size = first_point_from_args.canvas_size
-
-        if isinstance(output, Tensor) and not isinstance(output, Points):
-            output = Points._wrap(output, canvas_size=canvas_size)
-        elif isinstance(output, (tuple, list)):
-            output = type(output)(Points._wrap(part, canvas_size=canvas_size) for part in output)
-        return output
-
-    def __repr__(self, *, tensor_contents: Any = None) -> str:  # noqa: ANN401
-        return self._make_repr(canvas_size=self.canvas_size)
-
-
-def resize_points(
-    points: torch.Tensor,
-    canvas_size: tuple[int, int],
-    size: tuple[int, int] | list[int],
-    max_size: int | None = None,
-) -> tuple[torch.Tensor, tuple[int, int]]:
-    """Resize points."""
-    old_height, old_width = canvas_size
-    new_height, new_width = F._geometry._compute_resized_output_size(  # noqa: SLF001
-        canvas_size,
-        size=size,
-        max_size=max_size,
-    )
-
-    if (new_height, new_width) == (old_height, old_width):
-        return points, canvas_size
-
-    w_ratio = new_width / old_width
-    h_ratio = new_height / old_height
-    ratios = torch.tensor([w_ratio, h_ratio], device=points.device)
-    return (
-        points.mul(ratios).to(points.dtype),
-        (new_height, new_width),
-    )
-
-
-@F.register_kernel(functional=F.resize, tv_tensor_cls=Points)
-def _resize_points_dispatch(
-    inpt: Points,
-    size: tuple[int, int] | list[int],
-    max_size: int | None = None,
-    **kwargs,  # noqa: ARG001
-) -> Points:
-    output, canvas_size = resize_points(
-        inpt.as_subclass(torch.Tensor),
-        inpt.canvas_size,
-        size,
-        max_size=max_size,
-    )
-    return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size)
-
-
-def pad_points(
-    points: torch.Tensor,
-    canvas_size: tuple[int, int],
-    padding: tuple[int, ...] | list[int],
-    padding_mode: str = "constant",
-) -> tuple[torch.Tensor, tuple[int, int]]:
-    """Pad points."""
-    if padding_mode not in ["constant"]:
-        # TODO(sungchul): add support of other padding modes
-        raise ValueError(f"Padding mode '{padding_mode}' is not supported with bounding boxes")  # noqa: EM102, TRY003
-
-    left, right, top, bottom = F._geometry._parse_pad_padding(padding)  # noqa: SLF001
-
-    pad = [left, top]
-    points = points + torch.tensor(pad, dtype=points.dtype, device=points.device)
-
-    height, width = canvas_size
-    height += top + bottom
-    width += left + right
-    canvas_size = (height, width)
-
-    return clamp_points(points, canvas_size=canvas_size), canvas_size
-
-
-@F.register_kernel(functional=F.pad, tv_tensor_cls=Points)
-def _pad_points_dispatch(
-    inpt: Points,
-    padding: tuple[int, ...] | list[int],
-    padding_mode: str = "constant",
-    **kwargs,  # noqa: ARG001
-) -> Points:
-    output, canvas_size = pad_points(
-        inpt.as_subclass(torch.Tensor),
-        canvas_size=inpt.canvas_size,
-        padding=padding,
-        padding_mode=padding_mode,
-    )
-    return tv_tensors.wrap(output, like=inpt, canvas_size=canvas_size)
-
-
-@F.register_kernel(functional=F.get_size, tv_tensor_cls=Points)
-def get_size_points(point: Points) -> list[int]:
-    """Get size of points."""
-    return list(point.canvas_size)
-
-
-def _clamp_points(points: Tensor, canvas_size: tuple[int, int]) -> Tensor:
-    in_dtype = points.dtype
-    points = points.clone() if points.is_floating_point() else points.float()
-    points[..., 0].clamp_(min=0, max=canvas_size[1])
-    points[..., 1].clamp_(min=0, max=canvas_size[0])
-    return points.to(in_dtype)
-
-
-def clamp_points(inpt: Tensor, canvas_size: tuple[int, int] | None = None) -> Tensor:
-    """Clamp point range."""
-    if not torch.jit.is_scripting():
-        _log_api_usage_once(clamp_points)
-
-    if torch.jit.is_scripting() or F._utils.is_pure_tensor(inpt):  # noqa: SLF001
-        if canvas_size is None:
-            raise ValueError("For pure tensor inputs, `canvas_size` has to be passed.")  # noqa: EM101, TRY003
-        return _clamp_points(inpt, canvas_size=canvas_size)
-    elif isinstance(inpt, Points):  # noqa: RET505
-        if canvas_size is not None:
-            raise ValueError("For point tv_tensor inputs, `canvas_size` must not be passed.")  # noqa: EM101, TRY003
-        output = _clamp_points(inpt.as_subclass(Tensor), canvas_size=inpt.canvas_size)
-        return tv_tensors.wrap(output, like=inpt)
-    else:
-        raise TypeError(  # noqa: TRY003
-            f"Input can either be a plain tensor or a point tv_tensor, but got {type(inpt)} instead.",  # noqa: EM102
+            f"keep_ratio={self.keep_ratio}, "
+            f"bit_depth={self.bit_depth})"
         )
 
 
diff --git a/library/src/otx/data/entity/sample.py b/library/src/otx/data/entity/sample.py
index 6d9d5fa1dd4..5a7a8b1f84a 100644
--- a/library/src/otx/data/entity/sample.py
+++ b/library/src/otx/data/entity/sample.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Sample classes for OTX data entities."""
@@ -6,13 +6,12 @@
 from __future__ import annotations
 
 from dataclasses import asdict, dataclass
-from typing import TYPE_CHECKING, Any, Sequence
+from typing import TYPE_CHECKING, Any, Sequence, cast
 
-import numpy as np
 import polars as pl
 import torch
-import torch.utils._pytree as pytree
-from datumaro.experimental import Sample, register_sample
+from datumaro.experimental import register_sample
+from datumaro.experimental.dataset import Sample
 from datumaro.experimental.fields import ImageInfo as DmImageInfo
 from datumaro.experimental.fields import (
     Subset,
@@ -28,6 +27,9 @@
 from torchvision import tv_tensors
 
 from otx.data.entity.base import ImageInfo
+from otx.data.entity.utils import (
+    register_pytree_node,
+)
 from otx.data.entity.validation import (
     validate_bboxes,
     validate_feature_vectors,
@@ -43,53 +45,11 @@
     from torchvision.tv_tensors import BoundingBoxes, Mask
 
 
-def register_pytree_node(cls: type[Sample]) -> type[Sample]:
-    """Decorator to register an OTX data entity with PyTorch's PyTree.
-
-    This decorator should be applied to every OTX data entity, as TorchVision V2 transforms
-    use the PyTree to flatten and unflatten the data entity during runtime.
-
-    Example:
-        `MulticlassClsDataEntity` example ::
-
-            @register_pytree_node
-            @dataclass
-            class MulticlassClsDataEntity(OTXDataEntity):
-                ...
-    """
-
-    def flatten_fn(obj: object) -> tuple[list[Any], list[str]]:
-        obj_dict = dict(obj.__dict__)
-
-        missing_keys = set(obj.__class__.__annotations__.keys()) - set(obj_dict.keys())
-        for key in missing_keys:
-            obj_dict[key] = getattr(obj, key)
-
-        return (list(obj_dict.values()), list(obj_dict.keys()))
-
-    def unflatten_fn(values: list[Any], context: list[str]) -> object:
-        kwargs = dict(zip(context, values))
-        # Extract _img_info to set after construction (since __post_init__ would overwrite it)
-        img_info = kwargs.pop("_img_info", None)
-        obj = cls(**kwargs)
-        # Restore _img_info if it was present (preserves transformed img_info)
-        if img_info is not None:
-            obj._img_info = img_info
-        return obj
-
-    pytree.register_pytree_node(
-        cls,
-        flatten_fn=flatten_fn,
-        unflatten_fn=unflatten_fn,
-    )
-    return cls
-
-
 @register_pytree_node
 class OTXSample(Sample):
     """Base class for OTX data samples."""
 
-    image: np.ndarray | torch.Tensor | tv_tensors.Image | Any
+    image: torch.Tensor | tv_tensors.Image
     subset: Subset = subset_field()
 
     @property
@@ -112,7 +72,7 @@ class ClassificationSample(OTXSample):
 
     subset: Subset = subset_field()
 
-    image: tv_tensors.Image | np.ndarray | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=True)
+    image: tv_tensors.Image | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=True)
     label: torch.Tensor = label_field(pl.UInt8())
     dm_image_info: DmImageInfo = image_info_field()
 
@@ -131,8 +91,8 @@ def __post_init__(self) -> None:
 class ClassificationMultiLabelSample(OTXSample):
     """ClassificationMultiLabelSample is a base class for OTX multi label classification items."""
 
-    image: tv_tensors.Image | np.ndarray | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=True)
-    label: np.ndarray | torch.Tensor = label_field(pl.UInt8(), multi_label=True)
+    image: tv_tensors.Image | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=True)
+    label: torch.Tensor = label_field(pl.UInt8(), multi_label=True)
     dm_image_info: DmImageInfo = image_info_field()
 
     def __post_init__(self) -> None:
@@ -150,8 +110,8 @@ def __post_init__(self) -> None:
 class ClassificationHierarchicalSample(OTXSample):
     """ClassificationHierarchicalSample is a base class for OTX hierarchical classification items."""
 
-    image: tv_tensors.Image | np.ndarray | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=True)
-    label: np.ndarray | torch.Tensor = label_field(pl.UInt8(), is_list=True)
+    image: tv_tensors.Image | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=True)
+    label: torch.Tensor = label_field(pl.UInt8(), is_list=True)
     dm_image_info: DmImageInfo = image_info_field()
 
     def __post_init__(self) -> None:
@@ -169,18 +129,19 @@ def __post_init__(self) -> None:
 class DetectionSample(OTXSample):
     """DetectionSample is a base class for OTX detection items."""
 
-    image: tv_tensors.Image | np.ndarray | torch.Tensor = image_field(
-        dtype=pl.UInt8(), format="RGB", channels_first=True
-    )
+    image: tv_tensors.Image | torch.Tensor = image_field(dtype=pl.UInt8(), format="RGB", channels_first=True)
     label: torch.Tensor = label_field(pl.UInt8(), is_list=True)
-    bboxes: np.ndarray | tv_tensors.BoundingBoxes = bbox_field(dtype=pl.Float32())
+    # Use Union type to allow torch.Tensor from Polars (since tv_tensors.BoundingBoxes
+    # conversion is not supported in Datumaro), then convert in __post_init__
+    bboxes: tv_tensors.BoundingBoxes | torch.Tensor = bbox_field(dtype=pl.Float32())
     dm_image_info: DmImageInfo = image_info_field()
 
     def __post_init__(self) -> None:
-        # Convert bboxes to tv_tensors format
         shape = (self.dm_image_info.height, self.dm_image_info.width)
-        if isinstance(self.bboxes, np.ndarray):
-            self.bboxes = tv_tensors.BoundingBoxes(  # pyrefly: ignore[no-matching-overload]
+        # Ensure bboxes are tv_tensors.BoundingBoxes
+        if not isinstance(self.bboxes, tv_tensors.BoundingBoxes):
+            # If it's a plain tensor, wrap it
+            self.bboxes = tv_tensors.BoundingBoxes(
                 self.bboxes,
                 format=tv_tensors.BoundingBoxFormat.XYXY,
                 canvas_size=shape,
@@ -200,7 +161,7 @@ class SegmentationSample(OTXSample):
     """OTXSample for segmentation tasks."""
 
     subset: Subset = subset_field()
-    image: np.ndarray | tv_tensors.Image | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=False)
+    image: tv_tensors.Image | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=False)
     masks: tv_tensors.Mask = mask_field(dtype=pl.UInt8(), channels_first=True, has_channels_dim=True)
     dm_image_info: DmImageInfo = image_info_field()
 
@@ -219,17 +180,21 @@ class InstanceSegmentationSample(OTXSample):
     """OTXSample for instance segmentation tasks."""
 
     subset: Subset = subset_field()
-    image: tv_tensors.Image | np.ndarray | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=True)
-    bboxes: np.ndarray | tv_tensors.BoundingBoxes = bbox_field(dtype=pl.Float32())
+    image: tv_tensors.Image | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=True)
+    # Use Union type to allow torch.Tensor from Polars (since tv_tensors.BoundingBoxes
+    # conversion is not supported in Datumaro), then convert in __post_init__
+    bboxes: tv_tensors.BoundingBoxes | torch.Tensor = bbox_field(dtype=pl.Float32())
     masks: tv_tensors.Mask = instance_mask_field(dtype=pl.UInt8())
-    label: torch.Tensor = label_field(is_list=True)
+    label: torch.Tensor = label_field(dtype=pl.UInt8(), is_list=True)
     dm_image_info: DmImageInfo = image_info_field()
 
     def __post_init__(self) -> None:
         shape = (self.dm_image_info.height, self.dm_image_info.width)
-        # Convert bboxes to tv_tensors format
-        if isinstance(self.bboxes, np.ndarray):
-            self.bboxes = tv_tensors.BoundingBoxes(  # pyrefly: ignore[no-matching-overload]
+
+        # Ensure bboxes are tv_tensors.BoundingBoxes
+        if not isinstance(self.bboxes, tv_tensors.BoundingBoxes):
+            # If it's a plain tensor, wrap it
+            self.bboxes = tv_tensors.BoundingBoxes(
                 self.bboxes,
                 format=tv_tensors.BoundingBoxFormat.XYXY,
                 canvas_size=shape,
@@ -249,8 +214,8 @@ class KeypointSample(OTXSample):
     """KeypointSample is a base class for OTX keypoint detection items."""
 
     subset: Subset = subset_field()
-    image: tv_tensors.Image | np.ndarray | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=True)
-    label: torch.Tensor = label_field(pl.UInt8(), is_list=True)
+    image: tv_tensors.Image | torch.Tensor = image_field(dtype=pl.UInt8(), channels_first=True)
+    label: torch.Tensor = label_field(dtype=pl.UInt8(), is_list=True)
     keypoints: torch.Tensor = keypoints_field()
     dm_image_info: DmImageInfo = image_info_field()
 
@@ -264,38 +229,12 @@ def __post_init__(self) -> None:
         )
 
 
-def collate_fn(samples: list[OTXSample]) -> OTXSampleBatch:
-    """Collate OTXSamples into a batch.
-
-    Args:
-        samples: List of OTXSamples to batch
-
-    Returns:
-        Batched OTXSampleBatch with stacked tensors
-    """
-    # Check if all images have the same size. TODO(kprokofi): remove this check once OV IR models are moved.
-    if all(sample.image.shape == samples[0].image.shape for sample in samples):
-        images = torch.stack([sample.image for sample in samples])
-    else:
-        # we need this only in case of OV inference, where no resize
-        images = [sample.image for sample in samples]
-
-    return OTXSampleBatch(
-        images=images,
-        labels=[sample.label for sample in samples],
-        bboxes=[sample.bboxes for sample in samples],
-        keypoints=[sample.keypoints for sample in samples],
-        masks=[sample.masks for sample in samples],
-        imgs_info=[sample.img_info for sample in samples],
-    )
-
-
 @dataclass
 class OTXSampleBatch:
     """OTX sample batch implementation.
 
     Attributes:
-        images: The batch of images as a tensor or list of tensors.
+        images: The batch of images as a BCHW tensor.
         labels: List of label tensors, optional.
         masks: List of masks, optional.
         bboxes: List of bounding boxes, optional.
@@ -303,7 +242,7 @@ class OTXSampleBatch:
         imgs_info: Sequence of image information, optional.
     """
 
-    images: torch.Tensor | list[torch.Tensor]
+    images: torch.Tensor | tv_tensors.Image | list[torch.Tensor] | list[tv_tensors.Image]
     labels: list[torch.Tensor] | None = None
     masks: list[Mask] | None = None
     bboxes: list[BoundingBoxes] | None = None
@@ -313,9 +252,9 @@ class OTXSampleBatch:
     @property
     def batch_size(self) -> int:
         """Get the number of samples in the batch."""
-        if isinstance(self.images, torch.Tensor):
-            return self.images.shape[0]
-        return len(self.images)
+        if isinstance(self.images, list):
+            return len(self.images)
+        return self.images.shape[0]
 
     def __post_init__(self) -> None:
         """Validate the batch after initialization."""
@@ -323,7 +262,7 @@ def __post_init__(self) -> None:
 
     def _validate(self) -> None:
         """Validate the batch fields."""
-        validate_images(self.images)
+        validate_images(cast("torch.Tensor | list[torch.Tensor]", self.images))
         if self.labels is not None:
             validate_labels(self.labels)
         if self.bboxes is not None:
@@ -353,10 +292,7 @@ def maybe_wrap_tv(x: Any) -> Any:  # noqa: ANN401
 
         # Handle images separately because of tv_tensors wrapping
         if self.images is not None:
-            if isinstance(self.images, list):
-                kwargs["images"] = [maybe_wrap_tv(img) for img in self.images]
-            else:
-                kwargs["images"] = maybe_wrap_tv(self.images)
+            kwargs["images"] = maybe_wrap_tv(self.images)
 
         # Generic handler for all other fields
         for field in ["labels", "bboxes", "keypoints", "masks"]:
@@ -424,7 +360,7 @@ class OTXPrediction:
         saliency_map: The saliency map for XAI, optional.
     """
 
-    image: torch.Tensor | np.ndarray
+    image: torch.Tensor | tv_tensors.Image
     img_info: ImageInfo | None = None
     label: torch.Tensor | None = None
     masks: Mask | None = None
@@ -433,3 +369,21 @@ class OTXPrediction:
     scores: torch.Tensor | None = None
     feature_vector: torch.Tensor | None = None
     saliency_map: torch.Tensor | None = None
+
+
+def __getattr__(name: str) -> type:
+    """PEP 562 hook: recreate dynamic sample-dtype classes for pickle.
+
+    ``with_image_dtype()`` creates subclasses like ``ClassificationSample_uint16``
+    at runtime.  When a DataLoader worker (spawned process) unpickles the
+    dataset, Python looks up these classes by name on this module.  In a fresh
+    process they don't exist yet, so this hook recreates them on demand.
+    """
+    for suffix in ("_uint16", "_int16", "_float32"):
+        if name.endswith(suffix):
+            base_cls = globals().get(name[: -len(suffix)])
+            if base_cls is not None and isinstance(base_cls, type):
+                from otx.data.entity.utils import with_image_dtype
+
+                return with_image_dtype(base_cls, suffix[1:])  # strip leading '_'
+    raise AttributeError(name)
diff --git a/library/src/otx/data/entity/utils.py b/library/src/otx/data/entity/utils.py
index b73043981f1..480500a1b26 100644
--- a/library/src/otx/data/entity/utils.py
+++ b/library/src/otx/data/entity/utils.py
@@ -1,21 +1,183 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Utility functions for OTX data entities."""
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+import struct
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
 
+import polars as pl
 import torch
 import torch.utils._pytree as pytree
+from datumaro.experimental.dataset import Sample  # noqa: TC002
+from datumaro.experimental.fields import image_field
 
 if TYPE_CHECKING:
-    from otx.data.entity import ImageInfo
-    from otx.data.entity.sample import OTXSample
+    from collections.abc import Iterable
 
+    from otx.data.entity.base import ImageInfo
 
-def register_pytree_node(cls: type[OTXSample]) -> type[OTXSample]:
+#: Map from IntensityConfig.storage_dtype strings to Polars dtype instances.
+STORAGE_DTYPE_MAP: dict[str, pl.DataType] = {
+    "uint8": pl.UInt8(),
+    "uint16": pl.UInt16(),
+    "int16": pl.Int16(),
+    "float32": pl.Float32(),
+}
+
+#: PIL modes that indicate high-bit-depth (>8-bit) images.
+_PIL_16BIT_MODES = frozenset({"I", "I;16", "I;16B", "I;16L", "I;16N"})
+_PIL_FLOAT_MODES = frozenset({"F"})
+
+#: Magic bytes for image format detection.
+_PNG_SIGNATURE = b"\x89PNG"
+_JPEG_SIGNATURE = b"\xff\xd8"
+_TIFF_LE = b"II"
+_TIFF_BE = b"MM"
+
+
+def detect_image_dtype(image_path: str | Path) -> str:
+    """Detect the storage dtype of an image from its file header.
+
+    Reads **only file metadata** — no pixel data is ever decoded:
+
+    * **PNG**: parses the IHDR chunk (25 bytes) for ``bit_depth``.
+    * **TIFF**: checks the ``BitsPerSample`` tag via PIL header.
+    * **JPEG**: always ``"uint8"`` (JPEG is 8-bit by design).
+    * **Other**: falls back to ``PIL.Image.open().mode`` (header only).
+
+    Args:
+        image_path: Path to a single image file.
+
+    Returns:
+        One of ``"uint8"``, ``"uint16"``, or ``"float32"``.
+    """
+    path = Path(image_path)
+
+    # Read the first 8 bytes to identify the format.
+    with path.open("rb") as f:
+        sig = f.read(8)
+
+    # PNG: IHDR bit_depth is at byte offset 24
+    if sig[:4] == _PNG_SIGNATURE:
+        with path.open("rb") as f:
+            f.seek(24)  # signature(8) + length(4) + "IHDR"(4) + width(4) + height(4)
+            bit_depth = struct.unpack("B", f.read(1))[0]
+        return "uint16" if bit_depth == 16 else "uint8"
+
+    # JPEG: always 8-bit
+    if sig[:2] == _JPEG_SIGNATURE:
+        return "uint8"
+
+    # TIFF: check BitsPerSample tag via PIL
+    if sig[:2] in (_TIFF_LE, _TIFF_BE):
+        from PIL import Image
+
+        with Image.open(path) as img:
+            if img.mode in _PIL_16BIT_MODES:
+                return "uint16"
+            if img.mode in _PIL_FLOAT_MODES:
+                return "float32"
+            tag_v2 = getattr(img, "tag_v2", None)
+            if tag_v2 and 258 in tag_v2:  # 258 = BitsPerSample
+                bits = tag_v2[258]
+                if isinstance(bits, tuple):
+                    bits = bits[0]
+                if bits == 16:
+                    return "uint16"
+        return "uint8"
+
+    # Fallback: PIL mode (reliable for grayscale 16-bit)
+    from PIL import Image
+
+    with Image.open(path) as img:
+        mode = img.mode
+    if mode in _PIL_16BIT_MODES:
+        return "uint16"
+    if mode in _PIL_FLOAT_MODES:
+        return "float32"
+    return "uint8"
+
+
+#: Cache for dynamically created sample classes to avoid re-creation.
+_SAMPLE_DTYPE_CACHE: dict[tuple[type, str], type[Sample]] = {}
+
+
+def with_image_dtype(
+    sample_cls: type[Sample],
+    storage_dtype: str,
+) -> type[Sample]:
+    """Create a variant of *sample_cls* whose ``image`` field uses *storage_dtype*.
+
+    When ``storage_dtype == "uint8"`` (the default) the original class is
+    returned unchanged — zero overhead for the common case.
+
+    For other dtypes a thin **dynamic subclass** is created that overrides the
+    ``image`` class-variable with the requested Polars dtype.  The subclass is
+    cached so repeated calls with the same arguments return the same class
+    object (important for Datumaro schema identity comparisons).
+
+    The dynamic class is used only during dataset construction
+    (``dm_subset.convert_to_schema(sample_type)``) and is **not** stored on
+    the dataset instance, so it never needs to survive pickle across
+    DataLoader worker boundaries.
+
+    Args:
+        sample_cls: One of the concrete sample classes (e.g.
+            :class:`ClassificationSample`, :class:`DetectionSample`).
+        storage_dtype: A key from :data:`STORAGE_DTYPE_MAP` — ``"uint8"``,
+            ``"uint16"``, ``"int16"``, or ``"float32"``.
+
+    Returns:
+        Either *sample_cls* itself (uint8) or a dynamically created subclass
+        with the overridden ``image`` field.
+    """
+    if storage_dtype == "uint8":
+        return sample_cls
+
+    pl_dtype = STORAGE_DTYPE_MAP.get(storage_dtype)
+    if pl_dtype is None:
+        msg = f"Unsupported storage_dtype={storage_dtype!r}. Supported values: {list(STORAGE_DTYPE_MAP)}"
+        raise ValueError(msg)
+
+    cache_key = (sample_cls, storage_dtype)
+    if cache_key in _SAMPLE_DTYPE_CACHE:
+        return _SAMPLE_DTYPE_CACHE[cache_key]
+
+    orig_image = getattr(sample_cls, "image", None)
+    channels_first = getattr(orig_image, "channels_first", True)
+    fmt = getattr(orig_image, "format", "RGB")
+
+    new_image_default = image_field(dtype=pl_dtype, channels_first=channels_first, format=fmt)
+
+    new_cls_name = f"{sample_cls.__name__}_{storage_dtype}"
+    new_cls: type[Sample] = type(  # type: ignore[assignment]
+        new_cls_name,
+        (sample_cls,),
+        {"image": new_image_default},
+    )
+    new_cls.__module__ = sample_cls.__module__
+    new_cls.__qualname__ = new_cls_name
+
+    # Make the class discoverable by pickle in the current process.
+    # In spawned workers the module-level __getattr__ in
+    # otx.data.entity.sample handles the lookup instead.
+    parent_module = sys.modules.get(sample_cls.__module__)
+    if parent_module is not None:
+        setattr(parent_module, new_cls_name, new_cls)
+
+    # Register with pytree so torchvision v2 transforms work
+    register_pytree_node(new_cls)
+
+    _SAMPLE_DTYPE_CACHE[cache_key] = new_cls
+    return new_cls
+
+
+def register_pytree_node(cls: type[Sample]) -> type[Sample]:
     """Decorator to register an OTX data entity with PyTorch's PyTree.
 
     This decorator should be applied to every OTX data entity, as TorchVision V2 transforms
@@ -29,8 +191,26 @@ def register_pytree_node(cls: type[OTXSample]) -> type[OTXSample]:
             class MulticlassClsDataEntity(OTXDataEntity):
                 ...
     """
-    flatten_fn = lambda obj: (list(obj.values()), list(obj.keys()))
-    unflatten_fn = lambda values, context: cls(**dict(zip(context, values)))
+
+    def flatten_fn(obj: object) -> tuple[list[Any], list[str]]:
+        obj_dict = dict(obj.__dict__)
+
+        missing_keys = set(obj.__class__.__annotations__.keys()) - set(obj_dict.keys())
+        for key in missing_keys:
+            obj_dict[key] = getattr(obj, key)
+
+        return (list(obj_dict.values()), list(obj_dict.keys()))
+
+    def unflatten_fn(values: Iterable[Any], context: Any) -> object:  # noqa: ANN401
+        kwargs = dict(zip(context, values))
+        # Extract _img_info to set after construction (since __post_init__ would overwrite it)
+        img_info = kwargs.pop("_img_info", None)
+        obj = cls(**kwargs)
+        # Restore _img_info if it was present (preserves transformed img_info)
+        if img_info is not None:
+            object.__setattr__(obj, "_img_info", img_info)
+        return obj
+
     pytree.register_pytree_node(
         cls,
         flatten_fn=flatten_fn,
diff --git a/library/src/otx/data/entity/validation.py b/library/src/otx/data/entity/validation.py
index 3e075ad2c25..405d2ae763c 100644
--- a/library/src/otx/data/entity/validation.py
+++ b/library/src/otx/data/entity/validation.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Validation functions for OTX data entities."""
@@ -18,35 +18,31 @@
 
 def validate_images(image_batch: torch.Tensor | list[torch.Tensor]) -> None:
     """Validate the image batch."""
-    if not isinstance(image_batch, list) and not isinstance(image_batch, torch.Tensor):
-        msg = f"Image batch must be a torch tensor or list of tensors. Got {type(image_batch)}"
+    if not isinstance(image_batch, (torch.Tensor, list)):
+        msg = f"Image batch must be a torch.Tensor or a list of torch.Tensors. Got {type(image_batch)}"
         raise TypeError(msg)
     if isinstance(image_batch, torch.Tensor):
-        if image_batch.dtype not in (torch.float32, torch.uint8):
-            msg = f"Image batch must have dtype float32 or uint8. Found {image_batch.dtype}"
+        if image_batch.dtype != torch.float32:
+            msg = f"Image batch must have dtype float32. Found {image_batch.dtype}"
             raise ValueError(msg)
         if image_batch.ndim != 4:
-            msg = "Image batch must have 4 dimensions"
+            msg = f"Image batch must have 4 dimensions (BCHW), got {image_batch.ndim}"
             raise ValueError(msg)
         if image_batch.shape[1] not in [1, 3]:
-            msg = "Image batch must have 1 or 3 channels"
+            msg = f"Image batch must have 1 or 3 channels, got {image_batch.shape[1]}"
             raise ValueError(msg)
-    else:
-        if not all(isinstance(image, torch.Tensor) for image in image_batch):
-            msg = "Image batch must be a list of torch tensors"
+    if isinstance(image_batch, list):
+        if not all(isinstance(img, torch.Tensor) for img in image_batch):
+            msg = "All items in image batch list must be torch.Tensors"
             raise TypeError(msg)
-        dtype = image_batch[0].dtype
-        if dtype not in (torch.float32, torch.uint8):
-            msg = "Image batch must have dtype float32 or uint8"
+        if not all(img.dtype == torch.float32 for img in image_batch):
+            msg = "All images in batch must have dtype float32"
             raise ValueError(msg)
-        if not all(image.dtype == dtype for image in image_batch):
-            msg = f"Not all tensors have the same dtype: expected {dtype}"
-            raise TypeError(msg)
-        if not all(image.ndim == 3 for image in image_batch):
-            msg = "Image batch must have 3 dimensions"
+        if not all(img.ndim == 3 for img in image_batch):
+            msg = "All images in batch must have 3 dimensions (CHW)"
             raise ValueError(msg)
-        if not all(image.shape[0] in [1, 3] for image in image_batch):
-            msg = "Image batch must have 1 or 3 channels"
+        if not all(img.shape[0] in [1, 3] for img in image_batch):
+            msg = "All images in batch must have 1 or 3 channels"
             raise ValueError(msg)
 
 
diff --git a/library/src/otx/data/factory.py b/library/src/otx/data/factory.py
index 46f99c3aa6f..c84675fd59f 100644
--- a/library/src/otx/data/factory.py
+++ b/library/src/otx/data/factory.py
@@ -1,15 +1,16 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Factory classes for dataset and transforms."""
 
 from __future__ import annotations
 
+import logging
 from typing import TYPE_CHECKING
 
 from otx.types.task import OTXTaskType
-from otx.types.transformer_libs import TransformLibType
 
+from .augmentation.pipeline import CPUAugmentationPipeline
 from .dataset.base import OTXDataset, Transforms
 
 if TYPE_CHECKING:
@@ -17,6 +18,8 @@
 
     from otx.config.data import SubsetConfig
 
+logger = logging.getLogger(__name__)
+
 
 __all__ = ["OTXDatasetFactory", "TransformLibFactory"]
 
@@ -25,14 +28,24 @@ class TransformLibFactory:
     """Factory class for transform."""
 
     @classmethod
-    def generate(cls: type[TransformLibFactory], config: SubsetConfig) -> Transforms:
-        """Create transforms from factory."""
-        if config.transform_lib_type == TransformLibType.TORCHVISION:
-            from .transform_libs.torchvision import TorchVisionTransformLib
+    def generate(cls: type[TransformLibFactory], config: SubsetConfig) -> Transforms | CPUAugmentationPipeline:
+        """Create transforms from factory.
+
+        Args:
+            config: SubsetConfig with augmentations_cpu.
 
-            return TorchVisionTransformLib.generate(config)
+        Returns:
+            CPUAugmentationPipeline built from config.
+        """
+        if config.augmentations_cpu:
+            # Already a pipeline object (e.g., from from_file method)
+            if isinstance(config.augmentations_cpu, CPUAugmentationPipeline):
+                return config.augmentations_cpu
+            return CPUAugmentationPipeline.from_config(config)
 
-        raise NotImplementedError(config.transform_lib_type)
+        # GPU-only configs may have an empty augmentations_cpu list;
+        # return an identity pipeline so downstream code always gets a valid object.
+        return CPUAugmentationPipeline(augmentations=[])
 
 
 class OTXDatasetFactory:
@@ -49,10 +62,15 @@ def create(
     ) -> OTXDataset:
         """Create OTXDataset."""
         transforms = TransformLibFactory.generate(cfg_subset)
+
+        # Auto-detect storage dtype from the first image's file header.
+        # Reads only metadata (e.g. PNG IHDR), no pixel data is decoded.
+        storage_dtype = cls._detect_storage_dtype(dm_subset)
+
         common_kwargs = {
             "dm_subset": dm_subset,
             "transforms": transforms,
-            "to_tv_image": cfg_subset.to_tv_image,
+            "storage_dtype": storage_dtype,
         }
 
         match task:
@@ -93,3 +111,44 @@ def create(
 
             case _:
                 raise NotImplementedError(task)
+
+    @staticmethod
+    def _detect_storage_dtype(dm_subset: Dataset) -> str:
+        """Detect image storage bit depth from file header or dataset schema.
+
+        First tries to probe the first image's file header via PIL (reads only
+        metadata, no pixel data decoded).  If that is not available (e.g. for
+        parquet-backed datasets with no media paths), falls back to the image
+        field dtype declared in the dataset schema.
+
+        Returns:
+            ``"uint8"``, ``"uint16"``, or ``"float32"``.
+        """
+        import polars as pl
+
+        from otx.data.entity.utils import detect_image_dtype
+
+        # 1. Try file-based detection
+        try:
+            first_item = next(iter(dm_subset))
+            path = getattr(first_item.media, "path", None) if hasattr(first_item, "media") else None
+            if path is not None:
+                return detect_image_dtype(path)
+        except StopIteration:
+            pass
+        except (OSError, ValueError, TypeError) as exc:
+            logger.debug(f"File-based dtype detection failed, falling back to schema: {exc}")
+
+        # 2. Fall back to schema-declared image dtype (parquet datasets)
+        try:
+            img_attr = dm_subset.schema.attributes.get("image")
+            if img_attr is not None and hasattr(img_attr, "field"):
+                dtype = getattr(img_attr.field, "dtype", None)
+                if dtype == pl.UInt16:
+                    return "uint16"
+                if dtype in (pl.Float32, pl.Float64):
+                    return "float32"
+        except (AttributeError, TypeError) as exc:
+            logger.debug(f"Schema-based dtype detection failed: {exc}")
+
+        return "uint8"
diff --git a/library/src/otx/data/module.py b/library/src/otx/data/module.py
index faa6d3aaa2d..6998fbf9fe7 100644
--- a/library/src/otx/data/module.py
+++ b/library/src/otx/data/module.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """LightningDataModule extension for OTX."""
@@ -8,19 +8,18 @@
 import logging
 import multiprocessing
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Sequence
+from typing import TYPE_CHECKING
 
 from datumaro.experimental.export_import import import_dataset
 from datumaro.experimental.fields import Subset
 from lightning import LightningDataModule
 from omegaconf import DictConfig, OmegaConf
 from torch.utils.data import DataLoader, RandomSampler
-from torchvision.transforms.v2 import Normalize
 
 from otx.config.data import SubsetConfig, TileConfig
+from otx.data.augmentation import CPUAugmentationPipeline
 from otx.data.dataset.tile import OTXTileDatasetFactory
 from otx.data.factory import OTXDatasetFactory
-from otx.data.transform_libs.torchvision import Compose, TorchVisionTransformLib
 from otx.data.utils import get_adaptive_num_workers, instantiate_sampler
 from otx.types.device import DeviceType
 from otx.types.label import LabelInfo
@@ -117,8 +116,18 @@ def __init__(
                 if subset_cfg.input_size is None:
                     subset_cfg.input_size = input_size  # type: ignore[assignment]
 
-        # Extract mean and std from Normalize transform
-        self.input_mean, self.input_std = self.extract_normalization_params(self.train_subset.transforms)
+        # Derive mean/std from the CPU pipeline's Normalize transform.
+        # If no Normalize is present (e.g. GPU-only normalization via Kornia),
+        # leave as None so models fall back to their own defaults.
+        # The GPUAugmentationCallback.setup() will later override the model's
+        # mean/std with the GPU pipeline's values if applicable.
+        if getattr(self.train_subset, "augmentations_cpu", None):
+            cpu_pipeline = CPUAugmentationPipeline.from_config(self.train_subset)
+            self.input_mean: tuple[float, float, float] | None = cpu_pipeline.mean
+            self.input_std: tuple[float, float, float] | None = cpu_pipeline.std
+        else:
+            self.input_mean = None
+            self.input_std = None
         self.input_size = input_size
 
         self._setup_otx_dataset(dataset)
@@ -183,55 +192,6 @@ def _setup_otx_dataset(self, dataset: Dataset) -> None:
 
         self.label_info = next(iter(label_infos))
 
-    @classmethod
-    def extract_normalization_params(
-        cls, transforms_source: Sequence[dict[str, Any]] | Compose | None
-    ) -> tuple[tuple[float, float, float], tuple[float, float, float]]:
-        """Extract mean and std from the dataset transforms.
-
-        Specifically, this method looks for a Normalize transform in the provided transforms, and extracts
-        the mean and std values used for normalization.
-        If not found, it returns default values of mean=(0.0, 0.0, 0.0) and std=(1.0, 1.0, 1.0).
-
-        Args:
-            transforms_source: Transforms applied to the dataset.
-                Should be specified as an iterable of transform descriptors (jsonargparse-like) or a Compose object
-
-        Returns:
-            Tuple of (mean, std) tuples.
-        """
-        mean = (0.0, 0.0, 0.0)
-        std = (1.0, 1.0, 1.0)
-
-        if transforms_source is None:
-            return mean, std
-        if hasattr(transforms_source, "__iter__"):
-            transforms_iterable = transforms_source
-        elif isinstance(transforms_source, Compose):
-            transforms_iterable = transforms_source.transforms
-        else:
-            msg = f"Transforms should be given as an iterable or a Compose object, got {type(transforms_source)}"
-            raise TypeError(msg)
-
-        for transform in transforms_iterable:
-            if isinstance(transform, dict) and "Normalize" in transform.get("class_path", ""):
-                # CLI case with jsonargparse
-                mean = transform["init_args"].get("mean", (0.0, 0.0, 0.0))
-                std = transform["init_args"].get("std", (1.0, 1.0, 1.0))
-                break
-
-            if isinstance(transform, Normalize):
-                # torchvision.transforms case
-                mean = transform.mean
-                std = transform.std
-                break
-
-        if len(mean) != 3 or len(std) != 3:
-            msg = f"Expected mean and std to have length 3, got mean={mean}, std={std}"
-            raise ValueError(msg)
-
-        return tuple(mean), tuple(std)  # type: ignore[return-value]
-
     @classmethod
     def from_otx_datasets(
         cls,
@@ -341,9 +301,9 @@ def from_otx_datasets(
             if subset is not None:
                 # Use provided subset config
                 subset_to_assign = subset
-                if subset.transforms:
+                if getattr(subset, "augmentations_cpu", None):
                     logger.warning(
-                        f"The provided {name} SubsetConfig contains transforms which will be overridden "
+                        f"The provided {name} SubsetConfig contains augmentations_cpu which will be overridden "
                         "by the transforms of the provided OTXDataset. When building OTXDataModule from "
                         "pre-constructed datasets, developers should set up the transforms when creating the datasets.",
                     )
@@ -353,16 +313,20 @@ def from_otx_datasets(
                     default_subset_configs = instance.get_default_subset_configs(instance.input_size)
                 subset_to_assign = default_subset_configs[f"{name}_subset"]
 
-            # Override transforms with the ones from the pre-constructed dataset
-            subset_to_assign.transforms = instance.subsets[name].transforms  # type: ignore[assignment]
+            # The pre-constructed datasets already have their transforms configured.
+            # No need to override - just set the subset config.
 
             # Set the 'train_subset', 'val_subset', 'test_subset' attributes
             setattr(instance, f"{name}_subset", subset_to_assign)
 
-        # Extract normalization parameters from train dataset transforms if available
-        instance.input_mean, instance.input_std = instance.extract_normalization_params(
-            instance.train_subset.transforms
-        )
+        # Derive normalization params from the CPU pipeline's Normalize transform if available.
+        if getattr(instance.train_subset, "augmentations_cpu", None):
+            _cpu_pipeline = CPUAugmentationPipeline.from_config(instance.train_subset)
+            instance.input_mean = _cpu_pipeline.mean
+            instance.input_std = _cpu_pipeline.std
+        else:
+            instance.input_mean = None
+            instance.input_std = None
 
         # Save hyperparameters
         instance.save_hyperparameters(
@@ -429,7 +393,6 @@ def get_default_subset_configs(self, input_size: tuple[int, int] | None = None)
                 msg = "input size is not specified in both the config file and the DataModule constructor."
                 raise ValueError(msg)
             subset_config_dict = SubsetConfig(**subset_config_dict)
-            subset_config_dict.transforms = TorchVisionTransformLib.generate(subset_config_dict)
             subset_dicts[subset_key] = subset_config_dict
         return subset_dicts
 
@@ -458,6 +421,7 @@ def train_dataloader(self) -> DataLoader:
             "persistent_workers": config.num_workers > 0,
             "sampler": sampler,
             "shuffle": sampler is None,
+            "prefetch_factor": 2 if config.num_workers > 0 else None,
             "multiprocessing_context": _MP_CONTEXT if config.num_workers > 0 else None,
         }
 
diff --git a/library/src/otx/data/transform_libs/__init__.py b/library/src/otx/data/transform_libs/__init__.py
deleted file mode 100644
index 95e64a90abf..00000000000
--- a/library/src/otx/data/transform_libs/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (C) 2023-2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-"""Helpers to support data transform functions from various frameworks."""
diff --git a/library/src/otx/data/transform_libs/torchvision.py b/library/src/otx/data/transform_libs/torchvision.py
deleted file mode 100644
index 25c4cb7659c..00000000000
--- a/library/src/otx/data/transform_libs/torchvision.py
+++ /dev/null
@@ -1,3094 +0,0 @@
-# Copyright (C) 2023-2025 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-"""Helper to support TorchVision data transform functions."""
-
-from __future__ import annotations
-
-import ast
-import copy
-import math
-import operator
-import typing
-from inspect import isclass
-from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Sequence
-
-import cv2
-import numpy as np
-import PIL.Image
-import torch
-import torchvision.transforms.v2 as tvt_v2
-import typeguard
-from lightning.pytorch.cli import instantiate_class
-from omegaconf import DictConfig
-from scipy.stats import truncnorm
-from torchvision import tv_tensors
-from torchvision._utils import sequence_to_str
-from torchvision.transforms.v2 import GaussianBlur, GaussianNoise
-from torchvision.transforms.v2 import functional as F  # noqa: N812
-
-from otx.data.entity.base import (
-    Points,
-    _crop_image_info,
-    _pad_image_info,
-    _resize_image_info,
-    _resized_crop_image_info,
-)
-from otx.data.entity.sample import (
-    OTXSample,
-)
-from otx.data.transform_libs.utils import (
-    CV2_INTERP_CODES,
-    cache_randomness,
-    centers_bboxes,
-    clip_bboxes,
-    crop_masks,
-    flip_bboxes,
-    flip_image,
-    flip_masks,
-    get_bboxes_from_masks,
-    get_image_shape,
-    is_inside_bboxes,
-    overlap_bboxes,
-    project_bboxes,
-    rescale_bboxes,
-    rescale_keypoints,
-    rescale_masks,
-    scale_size,
-    to_np_image,
-    translate_bboxes,
-    translate_masks,
-)
-from otx.data.utils import import_object_from_module
-
-if TYPE_CHECKING:
-    from otx.config.data import SubsetConfig
-
-
-# mypy: disable-error-code="attr-defined"
-
-RNG = np.random.default_rng(42)
-
-
-def custom_query_size(flat_inputs: list[Any]) -> tuple[int, int]:  # noqa: D103
-    sizes = {
-        tuple(F.get_size(inpt))
-        for inpt in flat_inputs
-        if tvt_v2._utils.check_type(  # noqa: SLF001
-            inpt,
-            (
-                F.is_pure_tensor,
-                tv_tensors.Image,
-                PIL.Image.Image,
-                tv_tensors.Video,
-                tv_tensors.Mask,
-                tv_tensors.BoundingBoxes,
-                Points,
-            ),
-        )
-    }
-    if not sizes:
-        raise TypeError("No image, video, mask, bounding box, or point was found in the sample")  # noqa: EM101, TRY003
-    elif len(sizes) > 1:  # noqa: RET506
-        msg = f"Found multiple HxW dimensions in the sample: {sequence_to_str(sorted(sizes))}"
-        raise ValueError(msg)
-    h, w = sizes.pop()
-    return h, w
-
-
-tvt_v2._utils.query_size = custom_query_size  # noqa: SLF001
-
-
-class NumpytoTVTensorMixin:
-    """Convert numpy to tv tensors."""
-
-    is_numpy_to_tvtensor: bool
-
-    def convert(self, inputs: OTXSample | None) -> OTXSample | None:
-        """Convert numpy to tv tensors."""
-        if self.is_numpy_to_tvtensor and inputs is not None:
-            if (image := getattr(inputs, "image", None)) is not None:
-                if isinstance(image, np.ndarray):
-                    inputs.image = F.to_image(image.copy())
-                elif isinstance(image, torch.Tensor):
-                    # Always wrap in tv_tensors.Image to ensure proper type for torchvision transforms
-                    inputs.image = tv_tensors.Image(image)
-            if (bboxes := getattr(inputs, "bboxes", None)) is not None and isinstance(bboxes, np.ndarray):
-                inputs.bboxes = tv_tensors.BoundingBoxes(bboxes, format="xyxy", canvas_size=inputs.img_info.img_shape)  # type: ignore[attr-defined, union-attr]
-            if (masks := getattr(inputs, "masks", None)) is not None and isinstance(masks, np.ndarray):
-                inputs.masks = tv_tensors.Mask(masks)  # type: ignore[union-attr]
-        return inputs
-
-
-class MinIoURandomCrop(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmdet.datasets.transforms.MinIoURandomCrop with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L1338-L1490
-
-    Args:
-        min_ious (Sequence[float]): minimum IoU threshold for all intersections with bounding boxes.
-        min_crop_size (float): minimum crop's size (i.e. h,w := a*h, a*w, where a >= min_crop_size).
-        bbox_clip_border (bool, optional): Whether clip the objects outside the border of the image. Defaults to True.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-        probability (float): probability of applying this transformation. Defaults to 1.
-    """
-
-    def __init__(
-        self,
-        min_ious: Sequence[float] = (0.1, 0.3, 0.5, 0.7, 0.9),
-        min_crop_size: float = 0.3,
-        bbox_clip_border: bool = True,
-        is_numpy_to_tvtensor: bool = True,
-        probability: float = 1.0,
-    ) -> None:
-        super().__init__()
-        self.min_ious = min_ious
-        self.sample_mode = (1, *min_ious, 0)
-        self.min_crop_size = min_crop_size
-        self.bbox_clip_border = bbox_clip_border
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-        self.prob = probability
-
-    @cache_randomness
-    def _random_mode(self) -> int | float:
-        return RNG.choice(self.sample_mode)
-
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Forward for MinIoURandomCrop."""
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-
-        if torch.rand(1) >= self.prob:
-            return self.convert(inputs)
-
-        img: np.ndarray = to_np_image(inputs.image)
-        boxes = inputs.bboxes  # type: ignore[union-attr]
-        h, w, c = img.shape
-        while True:
-            mode = self._random_mode()
-            self.mode = mode
-            if mode == 1:
-                return self.convert(inputs)
-
-            min_iou = self.mode
-            for _ in range(50):
-                new_w = RNG.uniform(self.min_crop_size * w, w)
-                new_h = RNG.uniform(self.min_crop_size * h, h)
-
-                # h / w in [0.5, 2]
-                if new_h / new_w < 0.5 or new_h / new_w > 2:
-                    continue
-
-                left = RNG.uniform(0, w - new_w)
-                top = RNG.uniform(0, h - new_h)
-
-                patch = np.array((int(left), int(top), int(left + new_w), int(top + new_h)))
-                # Line or point crop is not allowed
-                if patch[2] == patch[0] or patch[3] == patch[1]:
-                    continue
-                overlaps = (
-                    overlap_bboxes(torch.as_tensor(patch.reshape(-1, 4).astype(np.float32)), boxes).numpy().reshape(-1)
-                )
-                if len(overlaps) > 0 and overlaps.min() < min_iou:
-                    continue
-
-                # center of boxes should inside the crop img
-                # only adjust boxes and instance masks when the gt is not empty
-                if len(overlaps) > 0:
-                    # adjust boxes
-                    def is_center_of_bboxes_in_patch(boxes: torch.Tensor, patch: np.ndarray) -> np.ndarray:
-                        centers = centers_bboxes(boxes).numpy()
-                        return (
-                            (centers[:, 0] > patch[0])
-                            * (centers[:, 1] > patch[1])
-                            * (centers[:, 0] < patch[2])
-                            * (centers[:, 1] < patch[3])
-                        )
-
-                    mask = is_center_of_bboxes_in_patch(boxes, patch)
-                    if not mask.any():
-                        continue
-                    if (bboxes := getattr(inputs, "bboxes", None)) is not None:
-                        mask = is_center_of_bboxes_in_patch(bboxes, patch)
-                        bboxes = bboxes[mask]
-                        bboxes = translate_bboxes(bboxes, (-patch[0], -patch[1]))
-                        if self.bbox_clip_border:
-                            bboxes = clip_bboxes(bboxes, (patch[3] - patch[1], patch[2] - patch[0]))
-                        inputs.bboxes = tv_tensors.BoundingBoxes(  # type: ignore[union-attr]
-                            bboxes,
-                            format="XYXY",
-                            canvas_size=(patch[3] - patch[1], patch[2] - patch[0]),
-                        )
-
-                        # labels
-                        if inputs.label is not None:
-                            inputs.label = inputs.label[mask]  # type: ignore[union-attr]
-
-                # adjust the img no matter whether the gt is empty before crop
-                img = img[patch[1] : patch[3], patch[0] : patch[2]]
-                inputs.image = img
-                inputs.img_info = _crop_image_info(inputs.img_info, *img.shape[:2])
-                return self.convert(inputs)
-
-    def __repr__(self) -> str:
-        repr_str = self.__class__.__name__
-        repr_str += f"(min_ious={self.min_ious}, "
-        repr_str += f"min_crop_size={self.min_crop_size}, "
-        repr_str += f"bbox_clip_border={self.bbox_clip_border})"
-        return repr_str
-
-
-class Resize(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmdet.datasets.transforms.Resize with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L135-L246
-
-    TODO : optimize logic to torcivision pipeline
-
-    Args:
-        scale (int or tuple): Images scales for resizing with (height, width). Defaults to None
-        scale_factor (float or tuple[float]): Scale factors for resizing with (height, width).
-            Defaults to None.
-        keep_ratio (bool): Whether to keep the aspect ratio when resizing the
-            image. Defaults to False.
-        clip_object_border (bool): Whether to clip the objects
-            outside the border of the image. In some dataset like MOT17, the gt
-            bboxes are allowed to cross the border of images. Therefore, we
-            don't need to clip the gt bboxes in these cases. Defaults to True.
-        interpolation (str): Interpolation method. Defaults to 'bilinear'.
-        interpolation_mask (str): Interpolation method for mask. Defaults to 'nearest'.
-        transform_bbox (bool): Whether to transform bounding boxes. Defaults to False.
-        transform_keypoints (bool): Whether to transform keypoints. Defaults to False.
-        transform_mask (bool): Whether to transform masks. Defaults to False.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-    """
-
-    def __init__(
-        self,
-        scale: int | tuple[int, int] | None = None,  # (H, W)
-        scale_factor: float | tuple[float, float] | None = None,  # (H, W)
-        keep_ratio: bool = False,
-        clip_object_border: bool = True,
-        interpolation: str = "bilinear",
-        interpolation_mask: str = "nearest",
-        transform_bbox: bool = False,
-        transform_keypoints: bool = False,
-        transform_mask: bool = False,
-        is_numpy_to_tvtensor: bool = True,
-        **kwargs,
-    ) -> None:
-        super().__init__()
-
-        assert scale is not None or scale_factor is not None, "`scale` and`scale_factor` can not both be `None`"  # noqa: S101
-
-        if scale is None:
-            self.scale = None
-        elif isinstance(scale, int):
-            self.scale = (scale, scale)
-        else:
-            self.scale = tuple(scale)  # type: ignore[assignment]
-
-        self.transform_bbox = transform_bbox
-        self.transform_keypoints = transform_keypoints
-        self.transform_mask = transform_mask
-        self.interpolation = interpolation
-        self.interpolation_mask = interpolation_mask
-        self.keep_ratio = keep_ratio
-        self.clip_object_border = clip_object_border
-        if scale_factor is None:
-            self.scale_factor = None
-        elif isinstance(scale_factor, float):
-            self.scale_factor = (scale_factor, scale_factor)
-        elif isinstance(scale_factor, tuple) and len(scale_factor) == 2:
-            self.scale_factor = scale_factor
-        else:
-            msg = f"expect scale_factor is float or Tuple(float), butget {type(scale_factor)}"
-            raise TypeError(msg)
-
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-
-    def _resize_img(self, inputs: OTXSample) -> tuple[OTXSample, tuple[float, float] | None]:
-        """Resize images with inputs.img_info.img_shape."""
-        scale_factor: tuple[float, float] | None = getattr(inputs.img_info, "scale_factor", None)  # (H, W)
-        if (img := getattr(inputs, "image", None)) is not None:
-            img = to_np_image(img)
-            img_shape = get_image_shape(img)
-            scale: tuple[int, int] = self.scale or scale_size(
-                img_shape,
-                self.scale_factor,  # type: ignore[arg-type]
-            )  # (H, W)
-
-            if self.keep_ratio:
-                h, w = img.shape[:2]
-                new_scale = min(scale[0] / h, scale[1] / w)
-                img = cv2.resize(
-                    img,
-                    None,
-                    fx=new_scale,
-                    fy=new_scale,
-                    interpolation=CV2_INTERP_CODES[self.interpolation],
-                )
-                scale = img.shape[:2]
-
-            else:
-                img = cv2.resize(img, scale[::-1], interpolation=CV2_INTERP_CODES[self.interpolation])
-
-            inputs.image = img
-            inputs.img_info = _resize_image_info(inputs.img_info, img.shape[:2])
-            inputs.img_info.keep_ratio = self.keep_ratio  # type: ignore[union-attr]
-            scale_factor = (scale[0] / img_shape[0], scale[1] / img_shape[1])
-        return inputs, scale_factor
-
-    def _resize_bboxes(self, inputs: OTXSample, scale_factor: tuple[float, float]) -> OTXSample:
-        """Resize bounding boxes with scale_factor only for `Resize`."""
-        if (bboxes := getattr(inputs, "bboxes", None)) is not None:
-            bboxes = rescale_bboxes(bboxes, scale_factor)
-            if self.clip_object_border:
-                bboxes = clip_bboxes(bboxes, inputs.img_info.img_shape)  # type: ignore[union-attr]
-            inputs.bboxes = tv_tensors.BoundingBoxes(bboxes, format="XYXY", canvas_size=inputs.img_info.img_shape)  # type: ignore[union-attr]
-        return inputs
-
-    def _resize_keypoints(self, inputs: OTXSample, scale_factor: tuple[float, float]) -> OTXSample:
-        """Resize keypoints with scale_factor only for `Resize`."""
-        if inputs.keypoints is not None:  # type: ignore[union-attr]
-            inputs.keypoints[:, :2] = rescale_keypoints(inputs.keypoints[:, :2], scale_factor)  # type: ignore[union-attr]
-        return inputs
-
-    def _resize_masks(self, inputs: OTXSample, scale_factor: tuple[float, float]) -> OTXSample:
-        """Resize masks with scale_factor only for `Resize`."""
-        masks = getattr(inputs, "masks", None)
-        if masks is not None and len(masks) > 0:
-            # bit mask
-            masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks
-            masks = rescale_masks(masks, scale_factor, interpolation=self.interpolation_mask)
-            inputs.masks = masks  # type: ignore[union-attr]
-
-        return inputs
-
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Transform function to resize images, bounding boxes, and masks."""
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-        inputs, scale_factor = self._resize_img(inputs)
-        if self.transform_bbox:
-            inputs = self._resize_bboxes(inputs, scale_factor)  # type: ignore[arg-type, assignment]
-
-        if self.transform_keypoints:
-            inputs = self._resize_keypoints(inputs, scale_factor)  # type: ignore[arg-type, assignment]
-
-        if self.transform_mask:
-            inputs = self._resize_masks(inputs, scale_factor)  # type: ignore[arg-type, assignment]
-
-        return self.convert(inputs)
-
-    def __repr__(self) -> str:
-        repr_str = self.__class__.__name__
-        repr_str += f"(scale={self.scale}, "
-        repr_str += f"scale_factor={self.scale_factor}, "
-        repr_str += f"keep_ratio={self.keep_ratio}, "
-        repr_str += f"clip_object_border={self.clip_object_border}, "
-        repr_str += f"interpolation={self.interpolation}, "
-        repr_str += f"interpolation_mask={self.interpolation_mask}, "
-        repr_str += f"transform_bbox={self.transform_bbox}, "
-        repr_str += f"transform_keypoint={self.transform_keypoints}, "
-        repr_str += f"transform_mask={self.transform_mask}, "
-        repr_str += f"is_numpy_to_tvtensor={self.is_numpy_to_tvtensor})"
-        return repr_str
-
-
-class RandomResizedCrop(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Crop the given image to random scale and aspect ratio.
-
-    This class implements mmpretrain.datasets.transforms.RandomResizedCrop reimplemented as torchvision.transform.
-    A crop of random size (default: of 0.08 to 1.0) of the original size and a
-    random aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio
-    is made. This crop is finally resized to given size.
-
-    Args:
-        scale (Sequence[int] | int): Desired output scale of the crop. If size is an
-            int instead of sequence like (h, w), a square crop (size, size) is
-            made.
-        crop_ratio_range (tuple): Range of the random size of the cropped
-            image compared to the original image. Defaults to (0.08, 1.0).
-        aspect_ratio_range (tuple): Range of the random aspect ratio of the
-            cropped image compared to the original image.
-            Defaults to (3. / 4., 4. / 3.).
-        max_attempts (int): Maximum number of attempts before falling back to
-            Central Crop. Defaults to 10.
-        interpolation (str): Interpolation method, accepted values are
-            'nearest', 'bilinear', 'bicubic', 'area', 'lanczos'. Defaults to
-            'bilinear'.
-        transform_mask (bool): Whether to transform masks. Defaults to False.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-    """
-
-    def __init__(
-        self,
-        scale: Sequence[int] | int,
-        crop_ratio_range: tuple[float, float] = (0.08, 1.0),
-        aspect_ratio_range: tuple[float, float] = (3.0 / 4.0, 4.0 / 3.0),
-        max_attempts: int = 10,
-        interpolation: str = "bilinear",
-        transform_mask: bool = False,
-        is_numpy_to_tvtensor: bool = True,
-    ) -> None:
-        super().__init__()
-        if isinstance(scale, Sequence):
-            assert len(scale) == 2  # noqa: S101
-            assert scale[0] > 0  # noqa: S101
-            assert scale[1] > 0  # noqa: S101
-            self.scale = scale
-        else:
-            assert scale > 0  # noqa: S101
-            self.scale = (scale, scale)
-        if (crop_ratio_range[0] > crop_ratio_range[1]) or (aspect_ratio_range[0] > aspect_ratio_range[1]):
-            msg = (
-                "range should be of kind (min, max). "
-                f"But received crop_ratio_range {crop_ratio_range} "
-                f"and aspect_ratio_range {aspect_ratio_range}."
-            )
-            raise ValueError(msg)
-        assert isinstance(max_attempts, int)  # noqa: S101
-        assert max_attempts >= 0, "max_attempts mush be int and no less than 0."  # noqa: S101
-        assert interpolation in (  # noqa: S101
-            "nearest",
-            "bilinear",
-            "bicubic",
-            "area",
-            "lanczos",
-        )
-
-        self.crop_ratio_range = crop_ratio_range
-        self.aspect_ratio_range = aspect_ratio_range
-        self.max_attempts = max_attempts
-        self.interpolation = interpolation
-        self.transform_mask = transform_mask
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-
-    @cache_randomness
-    def rand_crop_params(self, img: np.ndarray) -> tuple[int, int, int, int]:
-        """Get parameters for ``crop`` for a random sized crop.
-
-        Args:
-            img (ndarray): Image to be cropped.
-
-        Returns:
-            tuple: Params (offset_h, offset_w, target_h, target_w) to be
-                passed to `crop` for a random sized crop.
-        """
-        h, w = img.shape[:2]
-        area = h * w
-
-        for _ in range(self.max_attempts):
-            target_area = RNG.uniform(*self.crop_ratio_range) * area
-            log_ratio = (math.log(self.aspect_ratio_range[0]), math.log(self.aspect_ratio_range[1]))
-            aspect_ratio = math.exp(RNG.uniform(*log_ratio))
-            target_w = round(math.sqrt(target_area * aspect_ratio))
-            target_h = round(math.sqrt(target_area / aspect_ratio))
-
-            if 0 < target_w <= w and 0 < target_h <= h:
-                offset_h = RNG.integers(0, h - target_h + 1)
-                offset_w = RNG.integers(0, w - target_w + 1)
-
-                return offset_h, offset_w, target_h, target_w
-
-        # Fallback to central crop
-        in_ratio = float(w) / float(h)
-        if in_ratio < min(self.aspect_ratio_range):
-            target_w = w
-            target_h = round(target_w / min(self.aspect_ratio_range))
-        elif in_ratio > max(self.aspect_ratio_range):
-            target_h = h
-            target_w = round(target_h * max(self.aspect_ratio_range))
-        else:  # whole image
-            target_w = w
-            target_h = h
-        offset_h = (h - target_h) // 2
-        offset_w = (w - target_w) // 2
-        return offset_h, offset_w, target_h, target_w
-
-    def _bbox_clip(self, bboxes: np.ndarray, img_shape: tuple[int, int]) -> np.ndarray:
-        """Clip bboxes to fit the image shape.
-
-        Copy from mmcv.image.geometric.bbox_clip
-
-        Args:
-            bboxes (ndarray): Shape (..., 4*k)
-            img_shape (tuple[int]): (height, width) of the image.
-
-        Returns:
-            ndarray: Clipped bboxes.
-        """
-        cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype)
-        cmin[0::2] = img_shape[1] - 1
-        cmin[1::2] = img_shape[0] - 1
-        return np.maximum(np.minimum(bboxes, cmin), 0)
-
-    def _bbox_scaling(self, bboxes: np.ndarray, scale: float, clip_shape: tuple[int, int] | None = None) -> np.ndarray:
-        """Scaling bboxes w.r.t the box center.
-
-        Copy from mmcv.image.geometric.bbox_scaling
-
-        Args:
-            bboxes (ndarray): Shape(..., 4).
-            scale (float): Scaling factor.
-            clip_shape (tuple[int], optional): If specified, bboxes that exceed the
-                boundary will be clipped according to the given shape (h, w).
-
-        Returns:
-            ndarray: Scaled bboxes.
-        """
-        if float(scale) == 1.0:
-            scaled_bboxes = bboxes.copy()
-        else:
-            w = bboxes[..., 2] - bboxes[..., 0] + 1
-            h = bboxes[..., 3] - bboxes[..., 1] + 1
-            dw = (w * (scale - 1)) * 0.5
-            dh = (h * (scale - 1)) * 0.5
-            scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1)
-        if clip_shape is not None:
-            return self._bbox_clip(scaled_bboxes, clip_shape)
-        return scaled_bboxes
-
-    def _crop_img(
-        self,
-        img: np.ndarray,
-        bboxes: np.ndarray,
-        scale: float = 1.0,
-        pad_fill: float | list | None = None,
-    ) -> np.ndarray | list[np.ndarray]:
-        """Crop image patches.
-
-        Copy from mmcv.image.geometric.imcrop
-        3 steps: scale the bboxes -> clip bboxes -> crop and pad.
-
-        Args:
-            img (ndarray): Image to be cropped.
-            bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes.
-            scale (float, optional): Scale ratio of bboxes, the default value
-                1.0 means no scaling.
-            pad_fill (Number | list[Number]): Value to be filled for padding.
-                Default: None, which means no padding.
-
-        Returns:
-            list[ndarray] | ndarray: The cropped image patches.
-        """
-        chn = 1 if img.ndim == 2 else img.shape[2]
-        if pad_fill is not None and isinstance(pad_fill, (int, float)):
-            pad_fill = [pad_fill for _ in range(chn)]
-
-        _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes
-        scaled_bboxes = self._bbox_scaling(_bboxes, scale).astype(np.int32)
-        clipped_bbox = self._bbox_clip(scaled_bboxes, img.shape)
-
-        patches = []
-        for i in range(clipped_bbox.shape[0]):
-            x1, y1, x2, y2 = tuple(clipped_bbox[i, :])
-            if pad_fill is None:
-                patch = img[y1 : y2 + 1, x1 : x2 + 1, ...]
-            else:
-                _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :])
-                patch_h = _y2 - _y1 + 1
-                patch_w = _x2 - _x1 + 1
-                patch_shape = (patch_h, patch_w) if chn == 1 else (patch_h, patch_w, chn)
-                patch = np.array(pad_fill, dtype=img.dtype) * np.ones(patch_shape, dtype=img.dtype)
-                x_start = 0 if _x1 >= 0 else -_x1
-                y_start = 0 if _y1 >= 0 else -_y1
-                w = x2 - x1 + 1
-                h = y2 - y1 + 1
-                patch[y_start : y_start + h, x_start : x_start + w, ...] = img[y1 : y1 + h, x1 : x1 + w, ...]
-            patches.append(patch)
-
-        if bboxes.ndim == 1:
-            return patches[0]
-        return patches
-
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Transform function to randomly resized crop images and masks."""
-        inputs = _inputs[0]
-        if (img := getattr(inputs, "image", None)) is not None:
-            img = to_np_image(img)
-            offset_h, offset_w, target_h, target_w = self.rand_crop_params(img)
-            bboxes = np.array(
-                [
-                    offset_w,
-                    offset_h,
-                    offset_w + target_w - 1,
-                    offset_h + target_h - 1,
-                ],
-            )
-            img = self._crop_img(img, bboxes=bboxes)
-            inputs.img_info = _crop_image_info(inputs.img_info, *img.shape[:2])
-            img = cv2.resize(
-                img,
-                tuple(self.scale[::-1]),
-                dst=None,
-                interpolation=CV2_INTERP_CODES[self.interpolation],
-            )
-            inputs.image = img
-            inputs.img_info = _resize_image_info(inputs.img_info, img.shape[:2])
-
-            if self.transform_mask and (masks := getattr(inputs, "masks", None)) is not None:
-                masks = to_np_image(masks)
-                masks = self._crop_img(masks, bboxes=bboxes)
-                masks = cv2.resize(
-                    masks,
-                    tuple(self.scale[::-1]),
-                    dst=None,
-                    interpolation=CV2_INTERP_CODES["nearest"],
-                )
-                if masks.ndim == 2:
-                    masks = masks[None]
-                inputs.masks = tv_tensors.Mask(masks)  # type: ignore[union-attr]
-
-        return self.convert(inputs)
-
-    def __repr__(self):
-        """Print the basic information of the transform.
-
-        Returns:
-            str: Formatted string.
-        """
-        repr_str = self.__class__.__name__ + f"(scale={self.scale}"
-        repr_str += ", crop_ratio_range="
-        repr_str += f"{tuple(round(s, 4) for s in self.crop_ratio_range)}"
-        repr_str += ", aspect_ratio_range="
-        repr_str += f"{tuple(round(r, 4) for r in self.aspect_ratio_range)}"
-        repr_str += f", max_attempts={self.max_attempts}"
-        repr_str += f", interpolation={self.interpolation}"
-        repr_str += f", transform_mask={self.transform_mask}"
-        repr_str += f", is_numpy_to_tvtensor={self.is_numpy_to_tvtensor})"
-        return repr_str
-
-
-class EfficientNetRandomCrop(RandomResizedCrop):
-    """EfficientNet style RandomResizedCrop.
-
-    This class implements mmpretrain.datasets.transforms.EfficientNetRandomCrop reimplemented as torchvision.transform.
-
-    Args:
-        scale (int): Desired output scale of the crop. Only int size is
-            accepted, a square crop (size, size) is made.
-        min_covered (Number): Minimum ratio of the cropped area to the original
-             area. Defaults to 0.1.
-        crop_padding (int): The crop padding parameter in efficientnet style
-            center crop. Defaults to 32.
-        crop_ratio_range (tuple): Range of the random size of the cropped
-            image compared to the original image. Defaults to (0.08, 1.0).
-        aspect_ratio_range (tuple): Range of the random aspect ratio of the
-            cropped image compared to the original image.
-            Defaults to (3. / 4., 4. / 3.).
-        max_attempts (int): Maximum number of attempts before falling back to
-            Central Crop. Defaults to 10.
-        interpolation (str): Interpolation method, accepted values are
-            'nearest', 'bilinear', 'bicubic', 'area', 'lanczos'. Defaults to
-            'bicubic'.
-        backend (str): The image resize backend type, accepted values are
-            'cv2' and 'pillow'. Defaults to 'cv2'.
-    """
-
-    def __init__(
-        self,
-        scale: int,
-        min_covered: float = 0.1,
-        crop_padding: int = 32,
-        interpolation: str = "bicubic",
-        **kwarg,
-    ):
-        assert isinstance(scale, int)  # noqa: S101
-        super().__init__(scale, interpolation=interpolation, **kwarg)
-        assert min_covered >= 0, "min_covered should be no less than 0."  # noqa: S101
-        assert crop_padding >= 0, "crop_padding should be no less than 0."  # noqa: S101
-
-        self.min_covered = min_covered
-        self.crop_padding = crop_padding
-
-    # https://github.com/kakaobrain/fast-autoaugment/blob/master/FastAutoAugment/data.py
-    @cache_randomness
-    def rand_crop_params(self, img: np.ndarray) -> tuple[int, int, int, int]:
-        """Get parameters for ``crop`` for a random sized crop.
-
-        Args:
-            img (ndarray): Image to be cropped.
-
-        Returns:
-            tuple: Params (offset_h, offset_w, target_h, target_w) to be
-                passed to `crop` for a random sized crop.
-        """
-        h, w = img.shape[:2]
-        area = h * w
-        min_target_area = self.crop_ratio_range[0] * area
-        max_target_area = self.crop_ratio_range[1] * area
-
-        for _ in range(self.max_attempts):
-            aspect_ratio = RNG.uniform(*self.aspect_ratio_range)
-            min_target_h = round(math.sqrt(min_target_area / aspect_ratio))
-            max_target_h = round(math.sqrt(max_target_area / aspect_ratio))
-
-            if max_target_h * aspect_ratio > w:
-                max_target_h = int((w + 0.5 - 1e-7) / aspect_ratio)
-                if max_target_h * aspect_ratio > w:
-                    max_target_h -= 1
-
-            max_target_h = min(max_target_h, h)
-            min_target_h = min(max_target_h, min_target_h)
-
-            # slightly differs from tf implementation
-            target_h = round(RNG.uniform(min_target_h, max_target_h))
-            target_w = round(target_h * aspect_ratio)
-            target_area = target_h * target_w
-
-            # slight differs from tf. In tf, if target_area > max_target_area,
-            # area will be recalculated
-            if (
-                target_area < min_target_area
-                or target_area > max_target_area
-                or target_w > w
-                or target_h > h
-                or target_area < self.min_covered * area
-            ):
-                continue
-
-            offset_h = RNG.integers(0, h - target_h + 1)
-            offset_w = RNG.integers(0, w - target_w + 1)
-
-            return offset_h, offset_w, target_h, target_w
-
-        # Fallback to central crop
-        img_short = min(h, w)
-        crop_size = self.scale[0] / (self.scale[0] + self.crop_padding) * img_short
-
-        offset_h = max(0, round((h - crop_size) / 2.0))
-        offset_w = max(0, round((w - crop_size) / 2.0))
-        return offset_h, offset_w, crop_size, crop_size
-
-    def __repr__(self):
-        """Print the basic information of the transform.
-
-        Returns:
-            str: Formatted string.
-        """
-        repr_str = super().__repr__()[:-1]
-        repr_str += f", min_covered={self.min_covered}"
-        repr_str += f", crop_padding={self.crop_padding})"
-        return repr_str
-
-
-class RandomFlip(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmdet.datasets.transforms.RandomFlip with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L496-L596
-
-    TODO : optimize logic to torcivision pipeline
-
-     - ``prob`` is float, ``direction`` is string: the image will be
-        ``direction``ly flipped with probability of ``prob`` .
-        E.g., ``prob=0.5``, ``direction='horizontal'``,
-        then image will be horizontally flipped with probability of 0.5.
-     - ``prob`` is float, ``direction`` is list of string: the image will
-        be ``direction[i]``ly flipped with probability of
-        ``prob/len(direction)``.
-        E.g., ``prob=0.5``, ``direction=['horizontal', 'vertical']``,
-        then image will be horizontally flipped with probability of 0.25,
-        vertically with probability of 0.25.
-     - ``prob`` is list of float, ``direction`` is list of string:
-        given ``len(prob) == len(direction)``, the image will
-        be ``direction[i]``ly flipped with probability of ``prob[i]``.
-        E.g., ``prob=[0.3, 0.5]``, ``direction=['horizontal',
-        'vertical']``, then image will be horizontally flipped with
-        probability of 0.3, vertically with probability of 0.5.
-
-    Args:
-        probability (float | list[float], optional): The flipping probability.
-            Defaults to None.
-        direction(str | list[str]): The flipping direction. Options
-            If input is a list, the length must equal ``prob``. Each
-            element in ``prob`` indicates the flip probability of
-            corresponding direction. Defaults to 'horizontal'.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-    """
-
-    def __init__(
-        self,
-        probability: float | Iterable[float] | None = None,
-        direction: str | Sequence[str | None] = "horizontal",
-        is_numpy_to_tvtensor: bool = True,
-    ) -> None:
-        super().__init__()
-
-        if isinstance(probability, list):
-            assert all(isinstance(p, float) for p in probability)  # noqa: S101
-            assert 0 <= sum(probability) <= 1  # noqa: S101
-        elif isinstance(probability, float):
-            assert 0 <= probability <= 1  # noqa: S101
-        else:
-            msg = f"probability must be float or list of float, but got `{type(probability)}`."
-            raise TypeError(msg)
-        self.prob = probability
-
-        valid_directions = ["horizontal", "vertical", "diagonal"]
-        if isinstance(direction, str):
-            assert direction in valid_directions  # noqa: S101
-        elif isinstance(direction, list):
-            assert all(isinstance(d, str) for d in direction)  # noqa: S101
-            assert set(direction).issubset(set(valid_directions))  # noqa: S101
-        else:
-            msg = f"direction must be either str or list of str, but got `{type(direction)}`."
-            raise TypeError(msg)
-        self.direction = direction
-
-        if isinstance(probability, list):
-            assert len(probability) == len(self.direction)  # noqa: S101
-
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-
-    @cache_randomness
-    def _choose_direction(self) -> str:
-        """Choose the flip direction according to `prob` and `direction`."""
-        if isinstance(self.direction, Sequence) and not isinstance(self.direction, str):
-            # None means non-flip
-            direction_list: list = [*list(self.direction), None]
-        elif isinstance(self.direction, str):
-            # None means non-flip
-            direction_list = [self.direction, None]
-
-        if isinstance(self.prob, list):
-            non_prob: float = 1 - sum(self.prob)
-            prob_list = [*self.prob, non_prob]
-        elif isinstance(self.prob, float):
-            non_prob = 1.0 - self.prob
-            # exclude non-flip
-            single_ratio = self.prob / (len(direction_list) - 1)
-            prob_list = [single_ratio] * (len(direction_list) - 1) + [non_prob]
-
-        return RNG.choice(direction_list, p=prob_list)
-
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Flip images, bounding boxes, and semantic segmentation map."""
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-
-        if (cur_dir := self._choose_direction()) is not None:
-            # flip image
-            img = to_np_image(inputs.image)
-            img = flip_image(img, direction=cur_dir)
-            # copy is required as flip_image might return a view which is non-contiguous, and thus cannot be converted
-            # to tensor directly
-            inputs.image = img
-            img_shape = get_image_shape(img)
-
-            # flip bboxes
-            if (bboxes := getattr(inputs, "bboxes", None)) is not None:
-                bboxes = flip_bboxes(bboxes, inputs.img_info.img_shape, direction=cur_dir)  # type: ignore[union-attr]
-                inputs.bboxes = tv_tensors.BoundingBoxes(bboxes, format="XYXY", canvas_size=img_shape)  # type: ignore[union-attr]
-
-            # flip masks
-            if (masks := getattr(inputs, "masks", None)) is not None and len(masks) > 0:
-                masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks
-                inputs.masks = np.stack([flip_image(mask, direction=cur_dir) for mask in masks])  # type: ignore[union-attr]
-
-        return self.convert(inputs)
-
-    def __repr__(self) -> str:
-        repr_str = self.__class__.__name__
-        repr_str += f"(prob={self.prob}, "
-        repr_str += f"direction={self.direction}, "
-        repr_str += f"is_numpy_to_tvtensor={self.is_numpy_to_tvtensor})"
-        return repr_str
-
-
-class RandomGaussianBlur(GaussianBlur):
-    """Modified version of the torchvision GaussianBlur."""
-
-    def __init__(
-        self,
-        kernel_size: int | Sequence[int],
-        sigma: int | tuple[float, float] = (0.1, 2.0),
-        probability: float = 0.5,
-    ) -> None:
-        super().__init__(kernel_size=kernel_size, sigma=sigma)
-        self.prob = probability
-
-    def transform(self, inpt: torch.Tensor, params: dict[str, Any]) -> torch.Tensor:
-        """Main transform function."""
-        if self.prob >= RNG.random():
-            return super().transform(inpt, params)
-        return inpt
-
-
-class RandomGaussianNoise(GaussianNoise):
-    """Modified version of the torchvision GaussianNoise.
-
-    This augmentation allows to add gaussian noise to unscaled image.
-    Only float32 images are supported for this augmentation.
-    """
-
-    def __init__(self, mean: float = 0.0, sigma: float = 0.1, clip: bool = True, probability: float = 0.5) -> None:
-        super().__init__(mean=mean, sigma=sigma, clip=clip)
-        self.prob = probability
-
-    def _is_scaled(self, tensor: torch.Tensor) -> bool:
-        return torch.max(tensor) <= 1 + 1e-5
-
-    def forward(self, *_inputs: OTXSample) -> OTXSample:
-        """Main transform function."""
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-        if (img := getattr(inputs, "image", None)) is not None and self.prob >= RNG.random():
-            scaled = self._is_scaled(img)
-            sigma = self.sigma * 255 if not scaled else self.sigma
-            mean = self.mean * 255 if not scaled else self.mean
-            clip = False if not scaled else self.clip
-
-            img = self._call_kernel(F.gaussian_noise, img, mean=mean, sigma=sigma, clip=clip)
-            if not scaled:
-                img = torch.clamp(img, 0, 255)
-
-            inputs.image = img
-
-        return inputs
-
-
-class PhotoMetricDistortion(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmdet.datasets.transforms.PhotoMetricDistortion with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L1084-L1210
-
-    TODO : optimize logic to torcivision pipeline
-
-    Apply photometric distortion to image sequentially, every transformation
-    is applied with a probability of 0.5. The position of random contrast is in
-    second or second to last.
-
-    1. random brightness
-    2. random contrast (mode 0)
-    3. convert color from BGR to HSV
-    4. random saturation
-    5. random hue
-    6. convert color from HSV to BGR
-    7. random contrast (mode 1)
-    8. randomly swap channels
-
-    Args:
-        brightness_delta (int): delta of brightness.
-        contrast_range (sequence): range of contrast.
-        saturation_range (sequence): range of saturation.
-        hue_delta (int): delta of hue.
-        probability (float): the probability of applying each transformation.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-    """
-
-    def __init__(
-        self,
-        brightness_delta: int = 32,
-        contrast: Sequence[int | float] = (0.5, 1.5),
-        saturation: Sequence[int | float] = (0.5, 1.5),
-        hue_delta: int = 18,
-        probability: float = 0.5,
-        is_numpy_to_tvtensor: bool = True,
-    ) -> None:
-        super().__init__()
-
-        self.brightness_delta = brightness_delta
-        self.contrast_lower, self.contrast_upper = contrast
-        self.saturation_lower, self.saturation_upper = saturation
-        self.hue_delta = hue_delta
-        self.prob = probability
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-
-    @cache_randomness
-    def _random_flags(self) -> Sequence[int | float]:
-        mode = RNG.random() > self.prob
-        brightness_flag = RNG.random() > self.prob
-        contrast_flag = RNG.random() > self.prob
-        saturation_flag = RNG.random() > self.prob
-        hue_flag = RNG.random() > self.prob
-        swap_flag = RNG.random() > self.prob
-        delta_value = RNG.uniform(-self.brightness_delta, self.brightness_delta)
-        alpha_value = RNG.uniform(self.contrast_lower, self.contrast_upper)
-        saturation_value = RNG.uniform(self.saturation_lower, self.saturation_upper)
-        hue_value = RNG.uniform(-self.hue_delta, self.hue_delta)
-        swap_value = RNG.permutation(3)
-
-        return (
-            mode,
-            brightness_flag,
-            contrast_flag,
-            saturation_flag,
-            hue_flag,
-            swap_flag,
-            delta_value,
-            alpha_value,
-            saturation_value,
-            hue_value,
-            swap_value,
-        )
-
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Transform function to perform photometric distortion on images."""
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-
-        if (img := getattr(inputs, "image", None)) is not None:
-            img = to_np_image(img)
-            img = img.astype(np.float32)
-
-            (
-                mode,
-                brightness_flag,
-                contrast_flag,
-                saturation_flag,
-                hue_flag,
-                swap_flag,
-                delta_value,
-                alpha_value,
-                saturation_value,
-                hue_value,
-                swap_value,
-            ) = self._random_flags()
-
-            # random brightness
-            if brightness_flag:
-                img += delta_value
-
-            # mode == 0 --> do random contrast first
-            # mode == 1 --> do random contrast last
-            if mode == 1 and contrast_flag:
-                img *= alpha_value
-
-            # Convert RGB to HSV for color augmentation
-            img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)  # f32 -> f32
-
-            # random saturation
-            if saturation_flag:
-                img[..., 1] *= saturation_value
-                # For image(type=float32), after convert rgb to hsv by opencv,
-                # valid saturation value range is [0, 1]
-                if saturation_value > 1:
-                    img[..., 1] = img[..., 1].clip(0, 1)
-
-            # random hue
-            if hue_flag:
-                img[..., 0] += hue_value
-                img[..., 0][img[..., 0] > 360] -= 360
-                img[..., 0][img[..., 0] < 0] += 360
-
-            # Convert HSV back to RGB
-            img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)  # f32 -> f32
-
-            # random contrast
-            if mode == 0 and contrast_flag:
-                img *= alpha_value
-
-            # randomly swap channels
-            if swap_flag:
-                img = img[..., swap_value]
-
-            inputs.image = img
-        return self.convert(inputs)
-
-    def __repr__(self) -> str:
-        repr_str = self.__class__.__name__
-        repr_str += f"(brightness_delta={self.brightness_delta}, "
-        repr_str += "contrast_range="
-        repr_str += f"{(self.contrast_lower, self.contrast_upper)}, "
-        repr_str += "saturation_range="
-        repr_str += f"{(self.saturation_lower, self.saturation_upper)}, "
-        repr_str += f"hue_delta={self.hue_delta}, "
-        repr_str += f"is_numpy_to_tvtensor={self.is_numpy_to_tvtensor})"
-        return repr_str
-
-
-class RandomAffine(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmdet.datasets.transforms.RandomAffine with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L2736-L2901
-
-    RandomAffine supports images, bounding boxes, and masks.
-
-    TODO : optimize logic to torcivision pipeline
-
-    Args:
-        max_rotate_degree (float): Maximum degrees of rotation transform.
-            Defaults to 10.
-        max_translate_ratio (float): Maximum ratio of translation.
-            Defaults to 0.1.
-        scaling_ratio_range (tuple[float, float]): Min and max ratio of
-            scaling transform. Defaults to (0.5, 1.5).
-        max_shear_degree (float): Maximum degrees of shear
-            transform. Defaults to 2.
-        border (tuple[int, int]): Distance from height and width sides of input
-            image to adjust output shape. Only used in mosaic dataset.
-            Defaults to (0, 0).
-        border_val (tuple[int, int, int]): Border padding values of 3 channels.
-            Defaults to (114, 114, 114).
-        bbox_clip_border (bool, optional): Whether to clip the objects outside
-            the border of the image. In some dataset like MOT17, the gt bboxes
-            are allowed to cross the border of images. Therefore, we don't
-            need to clip the gt bboxes in these cases. Defaults to True.
-        transform_mask (bool): Whether to transform the mask. Defaults to True.
-        mask_fill_value (int): Fill value for mask. Defaults to 0.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-    """
-
-    def __init__(
-        self,
-        max_rotate_degree: float = 10.0,
-        max_translate_ratio: float = 0.1,
-        scaling_ratio_range: tuple[float, float] = (0.5, 1.5),
-        max_shear_degree: float = 2.0,
-        border: tuple[int, int] = (0, 0),  # (H, W)
-        border_val: tuple[int, int, int] = (114, 114, 114),
-        bbox_clip_border: bool = True,
-        transform_mask: bool = True,
-        recompute_bbox: bool = True,
-        mask_fill_value: int = 0,
-        is_numpy_to_tvtensor: bool = True,
-    ) -> None:
-        super().__init__()
-        self._validate_parameters(max_translate_ratio, scaling_ratio_range)
-        self.max_rotate_degree = max_rotate_degree
-        self.max_translate_ratio = max_translate_ratio
-        self.scaling_ratio_range = scaling_ratio_range
-        self.max_shear_degree = max_shear_degree
-        self.border = border  # (H, W)
-        self.border_val = border_val
-        self.bbox_clip_border = bbox_clip_border
-        self.transform_mask = transform_mask
-        self.recompute_bbox = recompute_bbox
-        self.mask_fill_value = mask_fill_value
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-
-    @staticmethod
-    def _validate_parameters(max_translate_ratio: float, scaling_ratio_range: tuple[float, float]) -> None:
-        """Validate input parameters."""
-        if not 0 <= max_translate_ratio <= 1:
-            msg = f"max_translate_ratio must be between 0 and 1, got {max_translate_ratio}"
-            raise ValueError(msg)
-        if scaling_ratio_range[0] > scaling_ratio_range[1]:
-            msg = f"scaling_ratio_range[0] must be <= scaling_ratio_range[1], got {scaling_ratio_range}"
-            raise ValueError(msg)
-        if scaling_ratio_range[0] <= 0:
-            msg = f"scaling_ratio_range[0] must be > 0, got {scaling_ratio_range[0]}"
-            raise ValueError(msg)
-
-    @cache_randomness
-    def _get_random_homography_matrix(self, height: int, width: int) -> np.ndarray:
-        """Generate random homography matrix for affine transformation.
-
-        Args:
-            height (int): Image height including border.
-            width (int): Image width including border.
-
-        Returns:
-            np.ndarray: 3x3 homography matrix.
-        """
-        # Generate transformation parameters
-        rotation_degree = RNG.uniform(-self.max_rotate_degree, self.max_rotate_degree)
-        scaling_ratio = RNG.uniform(self.scaling_ratio_range[0], self.scaling_ratio_range[1])
-        x_shear_degree = RNG.uniform(-self.max_shear_degree, self.max_shear_degree)
-        y_shear_degree = RNG.uniform(-self.max_shear_degree, self.max_shear_degree)
-        trans_x = RNG.uniform(-self.max_translate_ratio, self.max_translate_ratio) * width
-        trans_y = RNG.uniform(-self.max_translate_ratio, self.max_translate_ratio) * height
-
-        # Create transformation matrices
-        rotation_matrix = self._get_rotation_matrix(rotation_degree)
-        scaling_matrix = self._get_scaling_matrix(scaling_ratio)
-        shear_matrix = self._get_shear_matrix(x_shear_degree, y_shear_degree)
-        translate_matrix = self._get_translation_matrix(trans_x, trans_y)
-
-        # Combine transformations: T * Sh * R * S
-        return translate_matrix @ shear_matrix @ rotation_matrix @ scaling_matrix
-
-    def forward(self, *_inputs: OTXSample) -> OTXSample:
-        """Forward pass of RandomAffine transform.
-
-        Args:
-            inputs: Input data containing image and annotations.
-
-        Returns:
-            Transformed data item or original input if no valid annotations remain.
-
-        Raises:
-            ValueError: If inputs format is invalid.
-        """
-        if len(_inputs) != 1:
-            msg = f"RandomAffine can only transform single input, got {len(_inputs)}"
-            raise ValueError(msg)
-
-        inputs = _inputs[0]
-        img = to_np_image(inputs.image)
-
-        # Get random homography matrix for affine transformation
-        height, width = img.shape[:2]  # type: ignore[union-attr]
-        homography_matrix = self._get_random_homography_matrix(height, width)
-        output_shape = (height + self.border[0] * 2, width + self.border[1] * 2)
-
-        transformed_img = self._warp_image(img, homography_matrix, output_shape)
-        inputs.image = transformed_img
-        inputs.img_info = _resize_image_info(inputs.img_info, transformed_img.shape[:2])
-        valid_index = None
-        valid_bboxes = hasattr(inputs, "bboxes") and inputs.bboxes is not None and len(inputs.bboxes) > 0
-
-        if valid_bboxes:
-            # Test transform bboxes to see if any remain valid
-            valid_index = self._transform_bboxes(inputs, homography_matrix, output_shape)
-            # If no valid annotations will remain after transformation, skip entirely
-            if not valid_index.any():
-                inputs.image = img
-                return self.convert(inputs)  # type: ignore[return-value]
-
-        if hasattr(inputs, "masks") and inputs.masks is not None and len(inputs.masks) > 0:
-            self._transform_masks(inputs, homography_matrix, output_shape, valid_index)
-
-        if valid_bboxes and self.recompute_bbox:
-            self._recompute_bboxes(inputs, output_shape)
-
-        return self.convert(inputs)  # type: ignore[return-value]
-
-    def _warp_image(
-        self,
-        image: np.ndarray,
-        homography_matrix: np.ndarray,
-        output_shape: tuple[int, int],
-    ) -> np.ndarray:
-        """Warp image using the homography matrix.
-
-        Args:
-            image: Input image.
-            homography_matrix: Homography matrix.
-            output_shape: Output shape (height, width).
-
-        Returns:
-            np.ndarray: Warped image.
-        """
-        height, width = output_shape
-        return cv2.warpPerspective(image, homography_matrix, dsize=(width, height), borderValue=self.border_val)
-
-    def _transform_bboxes(
-        self,
-        inputs: OTXSample,
-        warp_matrix: np.ndarray,
-        output_shape: tuple[int, int],
-    ) -> np.ndarray:
-        """Transform bounding boxes and return valid indices.
-
-        Args:
-            inputs: Input data item.
-            warp_matrix: Transformation matrix.
-            output_shape: Output image shape (height, width).
-
-        Returns:
-            np.ndarray: Boolean array indicating valid bboxes.
-        """
-        bboxes = project_bboxes(inputs.bboxes, warp_matrix)
-
-        if self.bbox_clip_border:
-            bboxes = clip_bboxes(bboxes, output_shape)
-
-        # Get valid indices and filter
-        valid_index = is_inside_bboxes(bboxes, output_shape)
-
-        if valid_index.any():
-            inputs.bboxes = tv_tensors.BoundingBoxes(
-                bboxes[valid_index],
-                format="XYXY",
-                canvas_size=output_shape,
-            )
-            inputs.label = inputs.label[valid_index]  # type: ignore[index]
-
-        return valid_index
-
-    def _transform_masks(
-        self,
-        inputs: OTXSample,
-        warp_matrix: np.ndarray,
-        output_size: tuple[int, int],
-        valid_index: np.ndarray | None = None,
-    ) -> None:
-        """Transform masks using the warp matrix.
-
-        Args:
-            inputs: Input data item.
-            warp_matrix: Transformation matrix.
-            output_size: Output size (width, height).
-            valid_index: Boolean array indicating valid objects.
-        """
-        if not self.transform_mask or not hasattr(inputs, "masks") or inputs.masks is None or len(inputs.masks) == 0:
-            return
-
-        # Convert valid_index to numpy boolean array if it's a tensor
-        if valid_index is not None and hasattr(valid_index, "numpy"):
-            valid_index = valid_index.numpy()
-
-        # Filter masks using valid_index first
-        masks = inputs.masks[valid_index] if valid_index is not None else inputs.masks
-        masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks
-
-        if masks.ndim == 3:
-            masks = list(masks)
-
-        transformed_masks = []
-        for mask in masks:
-            transformed_mask = self._warp_single_mask(mask, warp_matrix, output_size)
-            transformed_masks.append(transformed_mask)
-
-        if transformed_masks:
-            masks_array = np.stack(transformed_masks).astype(np.uint8)
-            inputs.masks = tv_tensors.Mask(torch.from_numpy(masks_array > 0).to(torch.bool))
-
-    def _warp_single_mask(self, mask: np.ndarray, warp_matrix: np.ndarray, output_size: tuple[int, int]) -> np.ndarray:
-        """Warp a single mask using appropriate interpolation.
-
-        Args:
-            mask: Input mask.
-            warp_matrix: Transformation matrix.
-            output_size: Output size (width, height).
-
-        Returns:
-            np.ndarray: Warped mask.
-        """
-        unique_values = np.unique(mask)
-        height, width = output_size
-
-        # Binary mask: use 255/127 threshold for cleaner results
-        if len(unique_values) <= 2 and np.max(unique_values) <= 1:
-            warped_mask = cv2.warpPerspective(
-                mask.astype(np.uint8) * 255,
-                warp_matrix,
-                dsize=(width, height),
-                borderValue=0,
-            )
-            return warped_mask > 127
-
-        return cv2.warpPerspective(
-            mask.astype(np.uint8),
-            warp_matrix,
-            dsize=(width, height),
-            flags=cv2.INTER_NEAREST,
-            borderValue=0,
-        )
-
-    def _recompute_bboxes(self, inputs: OTXSample, output_shape: tuple[int, int]) -> None:
-        """Recomputes the bounding boxes after tranforming from the mask if available.
-
-        Args:
-            inputs: Input data item.
-            output_shape: Output shape (height, width).
-        """
-        has_masks = hasattr(inputs, "masks") and inputs.masks is not None and len(inputs.masks) > 0
-
-        if not has_masks:
-            return
-
-        # bboxes here are XYXY format
-        bboxes = inputs.bboxes
-        bboxes = bboxes.numpy() if not isinstance(bboxes, np.ndarray) else bboxes  # type: ignore[union-attr]
-
-        masks = inputs.masks
-        masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks  # type: ignore[union-attr]
-        for i, mask in enumerate(masks):
-            points = cv2.findNonZero(mask.astype(np.uint8))
-            if points is not None:
-                x, y, w, h = cv2.boundingRect(points)
-                bboxes[i] = np.array([x, y, x + w, y + h])
-
-        inputs.bboxes = tv_tensors.BoundingBoxes(
-            bboxes,
-            format="XYXY",
-            canvas_size=output_shape,
-        )
-
-    def __repr__(self) -> str:
-        """Return string representation of the transform."""
-        params = [
-            f"max_rotate_degree={self.max_rotate_degree}",
-            f"max_translate_ratio={self.max_translate_ratio}",
-            f"scaling_ratio_range={self.scaling_ratio_range}",
-            f"max_shear_degree={self.max_shear_degree}",
-            f"border={self.border}",
-            f"border_val={self.border_val}",
-            f"bbox_clip_border={self.bbox_clip_border}",
-            f"transform_mask={self.transform_mask}",
-            f"mask_fill_value={self.mask_fill_value}",
-            f"is_numpy_to_tvtensor={self.is_numpy_to_tvtensor}",
-        ]
-        return f"{self.__class__.__name__}({', '.join(params)})"
-
-    @staticmethod
-    def _get_rotation_matrix(rotate_degrees: float) -> np.ndarray:
-        """Create rotation transformation matrix.
-
-        Args:
-            rotate_degrees: Rotation angle in degrees.
-
-        Returns:
-            np.ndarray: 3x3 rotation matrix.
-        """
-        radian = math.radians(rotate_degrees)
-        cos_val, sin_val = np.cos(radian), np.sin(radian)
-        return np.array(
-            [
-                [cos_val, -sin_val, 0.0],
-                [sin_val, cos_val, 0.0],
-                [0.0, 0.0, 1.0],
-            ],
-            dtype=np.float32,
-        )
-
-    @staticmethod
-    def _get_scaling_matrix(scale_ratio: float) -> np.ndarray:
-        """Create scaling transformation matrix.
-
-        Args:
-            scale_ratio: Scaling factor.
-
-        Returns:
-            np.ndarray: 3x3 scaling matrix.
-        """
-        return np.array(
-            [
-                [scale_ratio, 0.0, 0.0],
-                [0.0, scale_ratio, 0.0],
-                [0.0, 0.0, 1.0],
-            ],
-            dtype=np.float32,
-        )
-
-    @staticmethod
-    def _get_shear_matrix(x_shear_degrees: float, y_shear_degrees: float) -> np.ndarray:
-        """Create shear transformation matrix.
-
-        Args:
-            x_shear_degrees: Shear angle in x direction (degrees).
-            y_shear_degrees: Shear angle in y direction (degrees).
-
-        Returns:
-            np.ndarray: 3x3 shear matrix.
-        """
-        x_radian = math.radians(x_shear_degrees)
-        y_radian = math.radians(y_shear_degrees)
-        return np.array(
-            [
-                [1, np.tan(x_radian), 0.0],
-                [np.tan(y_radian), 1, 0.0],
-                [0.0, 0.0, 1.0],
-            ],
-            dtype=np.float32,
-        )
-
-    @staticmethod
-    def _get_translation_matrix(x: float, y: float) -> np.ndarray:
-        """Create translation transformation matrix.
-
-        Args:
-            x: Translation in x direction.
-            y: Translation in y direction.
-
-        Returns:
-            np.ndarray: 3x3 translation matrix.
-        """
-        return np.array(
-            [
-                [1, 0.0, x],
-                [0.0, 1, y],
-                [0.0, 0.0, 1.0],
-            ],
-            dtype=np.float32,
-        )
-
-
-class CachedMosaic(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmdet.datasets.transforms.CachedMosaic with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L3342-L3573
-
-    TODO : optimize logic to torcivision pipeline
-
-    Args:
-        img_scale (Sequence[int]): Image size before mosaic pipeline of single
-            image. The shape order should be (height, width).
-            Defaults to (640, 640).
-        center_ratio_range (tuple[float]): Center ratio range of mosaic
-            output. Defaults to (0.5, 1.5).
-        bbox_clip_border (bool, optional): Whether to clip the objects outside
-            the border of the image. In some dataset like MOT17, the gt bboxes
-            are allowed to cross the border of images. Therefore, we don't
-            need to clip the gt bboxes in these cases. Defaults to True.
-        pad_val (float): Pad value. Defaults to 114.0.
-        probability (float): Probability of applying this transformation.
-            Defaults to 1.0.
-        max_cached_images (int): The maximum length of the cache. The larger
-            the cache, the stronger the randomness of this transform. As a
-            rule of thumb, providing 10 caches for each image suffices for
-            randomness. Defaults to 40.
-        random_pop (bool): Whether to randomly pop a result from the cache
-            when the cache is full. If set to False, use FIFO popping method.
-            Defaults to True.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-    """
-
-    def __init__(
-        self,
-        img_scale: tuple[int, int] | list[int] = (640, 640),  # (H, W)
-        center_ratio_range: tuple[float, float] = (0.5, 1.5),
-        bbox_clip_border: bool = True,
-        pad_val: float = 114.0,
-        probability: float = 1.0,
-        max_cached_images: int = 40,
-        random_pop: bool = True,
-        is_numpy_to_tvtensor: bool = True,
-    ) -> None:
-        super().__init__()
-
-        assert isinstance(img_scale, (tuple, list))  # noqa: S101
-        assert 0 <= probability <= 1.0, f"The probability should be in range [0,1]. got {probability}."  # noqa: S101
-
-        self.img_scale = img_scale  # (H, W)
-        self.center_ratio_range = center_ratio_range
-        self.bbox_clip_border = bbox_clip_border
-        self.pad_val = pad_val
-        self.prob = probability
-
-        self.results_cache: list[OTXSample] = []  # type: ignore[valid-type]
-        self.random_pop = random_pop
-        assert max_cached_images >= 4, f"The length of cache must >= 4, but got {max_cached_images}."  # noqa: S101
-        self.max_cached_images = max_cached_images
-
-        self.cnt_cached_images = 0
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-
-    @cache_randomness
-    def get_indexes(self, cache: list) -> list:
-        """Call function to collect indexes.
-
-        Args:
-            cache (list): The results cache.
-
-        Returns:
-            list: indexes.
-        """
-        return [RNG.integers(0, len(cache) - 1) for _ in range(3)]
-
-    @typing.no_type_check  # TODO(ashwinvaidya17): temporary
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Forward for CachedMosaic."""
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-
-        self.results_cache.append(copy.deepcopy(inputs))
-        if len(self.results_cache) > self.max_cached_images:
-            index = RNG.integers(0, len(self.results_cache) - 1) if self.random_pop else 0
-            self.results_cache.pop(index)
-
-        if len(self.results_cache) <= 4:
-            return self.convert(inputs)
-
-        if RNG.uniform(0, 1) > self.prob:
-            return self.convert(inputs)
-
-        indices = self.get_indexes(self.results_cache)
-        mix_results = [copy.deepcopy(self.results_cache[i]) for i in indices]
-
-        # TODO (mmdetection): refactor mosaic to reuse these code.
-        # https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L3465
-        mosaic_bboxes = []
-        mosaic_bboxes_labels = []
-        mosaic_masks = []
-        with_mask = bool(hasattr(inputs, "masks"))
-
-        inp_img: np.ndarray = to_np_image(inputs.image)
-        if len(inp_img.shape) == 3:
-            mosaic_img = np.full(
-                (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2), 3),
-                self.pad_val,
-                dtype=inp_img.dtype,
-            )
-        else:
-            mosaic_img = np.full(
-                (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2)),
-                self.pad_val,
-                dtype=inp_img.dtype,
-            )
-
-        # mosaic center x, y
-        center_x = int(RNG.uniform(*self.center_ratio_range) * self.img_scale[1])
-        center_y = int(RNG.uniform(*self.center_ratio_range) * self.img_scale[0])
-        center_position = (center_x, center_y)
-
-        loc_strs = ("top_left", "top_right", "bottom_left", "bottom_right")
-        for i, loc in enumerate(loc_strs):
-            results_patch = copy.deepcopy(inputs) if loc == "top_left" else copy.deepcopy(mix_results[i - 1])
-
-            img_i: np.ndarray = to_np_image(results_patch.image)
-            h_i, w_i = img_i.shape[:2]
-            # keep_ratio resize
-            scale_ratio_i = min(self.img_scale[0] / h_i, self.img_scale[1] / w_i)
-            img_i = cv2.resize(
-                img_i,
-                (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i)),
-                interpolation=cv2.INTER_LINEAR,
-            )
-
-            # compute the combine parameters
-            paste_coord, crop_coord = self._mosaic_combine(loc, center_position, img_i.shape[:2][::-1])
-            x1_p, y1_p, x2_p, y2_p = paste_coord
-            x1_c, y1_c, x2_c, y2_c = crop_coord
-
-            # crop and paste image
-            mosaic_img[y1_p:y2_p, x1_p:x2_p] = img_i[y1_c:y2_c, x1_c:x2_c]
-
-            # adjust coordinate
-            gt_bboxes_i = results_patch.bboxes
-            gt_bboxes_labels_i = results_patch.label
-
-            padw = x1_p - x1_c
-            padh = y1_p - y1_c
-            gt_bboxes_i = rescale_bboxes(gt_bboxes_i, (scale_ratio_i, scale_ratio_i))
-            gt_bboxes_i = translate_bboxes(gt_bboxes_i, (padw, padh))
-            mosaic_bboxes.append(gt_bboxes_i)
-            mosaic_bboxes_labels.append(gt_bboxes_labels_i)
-            if with_mask and (gt_masks_i := getattr(results_patch, "masks", None)) is not None and len(gt_masks_i) > 0:
-                gt_masks_i = gt_masks_i.numpy() if not isinstance(gt_masks_i, np.ndarray) else gt_masks_i
-                gt_masks_i = rescale_masks(gt_masks_i, float(scale_ratio_i))
-                gt_masks_i = translate_masks(
-                    gt_masks_i,
-                    out_shape=(int(self.img_scale[0] * 2), int(self.img_scale[1] * 2)),
-                    offset=padw,
-                    direction="horizontal",
-                )
-                gt_masks_i = translate_masks(
-                    gt_masks_i,
-                    out_shape=(int(self.img_scale[0] * 2), int(self.img_scale[1] * 2)),
-                    offset=padh,
-                    direction="vertical",
-                )
-                mosaic_masks.append(gt_masks_i)
-
-        mosaic_bboxes = torch.cat(mosaic_bboxes, dim=0)
-        mosaic_bboxes_labels = torch.cat(mosaic_bboxes_labels, dim=0)
-
-        if self.bbox_clip_border:
-            mosaic_bboxes = clip_bboxes(mosaic_bboxes, (2 * self.img_scale[0], 2 * self.img_scale[1]))
-
-        # remove outside bboxes
-        inside_inds = is_inside_bboxes(mosaic_bboxes, (2 * self.img_scale[0], 2 * self.img_scale[1])).numpy()
-        mosaic_bboxes = mosaic_bboxes[inside_inds]
-        mosaic_bboxes_labels = mosaic_bboxes_labels[inside_inds]
-
-        inputs.image = mosaic_img
-        inputs.img_info = _resized_crop_image_info(
-            inputs.img_info,
-            mosaic_img.shape[:2],
-        )  # TODO (sungchul): need to add proper function
-
-        inputs.bboxes = tv_tensors.BoundingBoxes(mosaic_bboxes, format="XYXY", canvas_size=mosaic_img.shape[:2])
-        inputs.label = mosaic_bboxes_labels
-        if with_mask and len(mosaic_masks) > 0:
-            inputs.masks = np.concatenate(mosaic_masks, axis=0)[inside_inds]
-        return self.convert(inputs)
-
-    def _mosaic_combine(
-        self,
-        loc: str,
-        center_position_xy: Sequence[float],
-        img_shape_wh: Sequence[int],
-    ) -> tuple[tuple[int, ...], tuple[int, ...]]:
-        """Calculate global coordinate of mosaic image and local coordinate of cropped sub-image.
-
-        Args:
-            loc (str): Index for the sub-image, loc in ("top_left",
-              "top_right", "bottom_left", "bottom_right").
-            center_position_xy (Sequence[float]): Mixing center for 4 images,
-                (x, y).
-            img_shape_wh (Sequence[int]): Width and height of sub-image
-
-        Returns:
-            tuple[tuple[int]]: Corresponding coordinate of pasting and
-                cropping
-                - paste_coord (tuple): paste corner coordinate in mosaic image.
-                - crop_coord (tuple): crop corner coordinate in mosaic image.
-        """
-        assert loc in ("top_left", "top_right", "bottom_left", "bottom_right")  # noqa: S101
-        if loc == "top_left":
-            # index0 to top left part of image
-            x1, y1, x2, y2 = map(
-                int,
-                (
-                    max(center_position_xy[0] - img_shape_wh[0], 0),
-                    max(center_position_xy[1] - img_shape_wh[1], 0),
-                    center_position_xy[0],
-                    center_position_xy[1],
-                ),
-            )
-            crop_coord = img_shape_wh[0] - (x2 - x1), img_shape_wh[1] - (y2 - y1), img_shape_wh[0], img_shape_wh[1]
-
-        elif loc == "top_right":
-            # index1 to top right part of image
-            x1, y1, x2, y2 = map(
-                int,
-                (
-                    center_position_xy[0],
-                    max(center_position_xy[1] - img_shape_wh[1], 0),
-                    min(center_position_xy[0] + img_shape_wh[0], self.img_scale[1] * 2),
-                    center_position_xy[1],
-                ),
-            )
-            crop_coord = 0, img_shape_wh[1] - (y2 - y1), min(img_shape_wh[0], x2 - x1), img_shape_wh[1]
-
-        elif loc == "bottom_left":
-            # index2 to bottom left part of image
-            x1, y1, x2, y2 = map(
-                int,
-                (
-                    max(center_position_xy[0] - img_shape_wh[0], 0),
-                    center_position_xy[1],
-                    center_position_xy[0],
-                    min(self.img_scale[0] * 2, center_position_xy[1] + img_shape_wh[1]),
-                ),
-            )
-            crop_coord = img_shape_wh[0] - (x2 - x1), 0, img_shape_wh[0], min(y2 - y1, img_shape_wh[1])
-
-        else:
-            # index3 to bottom right part of image
-            x1, y1, x2, y2 = map(
-                int,
-                (
-                    center_position_xy[0],
-                    center_position_xy[1],
-                    min(center_position_xy[0] + img_shape_wh[0], self.img_scale[1] * 2),
-                    min(self.img_scale[0] * 2, center_position_xy[1] + img_shape_wh[1]),
-                ),
-            )
-            crop_coord = 0, 0, min(img_shape_wh[0], x2 - x1), min(y2 - y1, img_shape_wh[1])
-
-        paste_coord = x1, y1, x2, y2
-        return paste_coord, crop_coord
-
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += f"(img_scale={self.img_scale}, "
-        repr_str += f"center_ratio_range={self.center_ratio_range}, "
-        repr_str += f"pad_val={self.pad_val}, "
-        repr_str += f"prob={self.prob}, "
-        repr_str += f"max_cached_images={self.max_cached_images}, "
-        repr_str += f"random_pop={self.random_pop}, "
-        repr_str += f"is_numpy_to_tvtensor={self.is_numpy_to_tvtensor})"
-        return repr_str
-
-
-class CachedMixUp(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmdet.datasets.transforms.CachedMixup with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L3577-L3854
-
-    TODO : optimize logic to torcivision pipeline
-
-    Args:
-        img_scale (Sequence[int]): Image output size after mixup pipeline.
-            The shape order should be (height, width). Defaults to (640, 640).
-        ratio_range (Sequence[float]): Scale ratio of mixup image.
-            Defaults to (0.5, 1.5).
-        flip_ratio (float): Horizontal flip ratio of mixup image.
-            Defaults to 0.5.
-        pad_val (float): Pad value. Defaults to 114.0.
-        max_iters (int): The maximum number of iterations. If the number of
-            iterations is greater than `max_iters`, but gt_bbox is still
-            empty, then the iteration is terminated. Defaults to 15.
-        bbox_clip_border (bool, optional): Whether to clip the objects outside
-            the border of the image. In some dataset like MOT17, the gt bboxes
-            are allowed to cross the border of images. Therefore, we don't
-            need to clip the gt bboxes in these cases. Defaults to True.
-        max_cached_images (int): The maximum length of the cache. The larger
-            the cache, the stronger the randomness of this transform. As a
-            rule of thumb, providing 10 caches for each image suffices for
-            randomness. Defaults to 20.
-        random_pop (bool): Whether to randomly pop a result from the cache
-            when the cache is full. If set to False, use FIFO popping method.
-            Defaults to True.
-        probability (float): Probability of applying this transformation.
-            Defaults to 1.0.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-    """
-
-    def __init__(
-        self,
-        img_scale: tuple[int, int] | list[int] = (640, 640),  # (H, W)
-        ratio_range: tuple[float, float] = (0.5, 1.5),
-        flip_ratio: float = 0.5,
-        pad_val: float = 114.0,
-        max_iters: int = 15,
-        bbox_clip_border: bool = True,
-        max_cached_images: int = 20,
-        random_pop: bool = True,
-        probability: float = 1.0,
-        is_numpy_to_tvtensor: bool = True,
-    ) -> None:
-        super().__init__()
-
-        assert isinstance(img_scale, (tuple, list))  # noqa: S101
-        assert max_cached_images >= 2, f"The length of cache must >= 2, but got {max_cached_images}."  # noqa: S101
-        assert 0 <= probability <= 1.0, f"The probability should be in range [0,1]. got {probability}."  # noqa: S101
-        self.dynamic_scale = img_scale  # (H, W)
-        self.ratio_range = ratio_range
-        self.flip_ratio = flip_ratio
-        self.pad_val = pad_val
-        self.max_iters = max_iters
-        self.bbox_clip_border = bbox_clip_border
-        self.results_cache: list[OTXSample] = []
-
-        self.max_cached_images = max_cached_images
-        self.random_pop = random_pop
-        self.prob = probability
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-
-    @cache_randomness
-    def get_indexes(self, cache: list) -> int:
-        """Call function to collect indexes.
-
-        Args:
-            cache (list): The result cache.
-
-        Returns:
-            int: index.
-        """
-        for _ in range(self.max_iters):
-            index = RNG.integers(0, len(cache) - 1)
-            gt_bboxes_i = cache[index].bboxes
-            if len(gt_bboxes_i) != 0:
-                break
-        return index
-
-    @typing.no_type_check  # TODO(ashwinvaidya17): temporary
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """MixUp transform function."""
-        # cache and pop images
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-
-        self.results_cache.append(copy.deepcopy(inputs))
-        if len(self.results_cache) > self.max_cached_images:
-            index = RNG.integers(0, len(self.results_cache) - 1) if self.random_pop else 0
-            self.results_cache.pop(index)
-
-        if len(self.results_cache) <= 1:
-            return self.convert(inputs)
-
-        if RNG.uniform(0, 1) > self.prob:
-            return self.convert(inputs)
-
-        index = self.get_indexes(self.results_cache)
-        retrieve_results = copy.deepcopy(self.results_cache[index])
-
-        # TODO (mmdetection): refactor mixup to reuse these code.
-        # https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L3721
-        if retrieve_results.bboxes.shape[0] == 0:
-            # empty bbox
-            return self.convert(inputs)
-
-        retrieve_img: np.ndarray = to_np_image(retrieve_results.image)
-        with_mask = bool(hasattr(inputs, "masks"))
-
-        jit_factor = RNG.uniform(*self.ratio_range)
-        is_flip = RNG.uniform(0, 1) > self.flip_ratio
-
-        if len(retrieve_img.shape) == 3:
-            out_img = (
-                np.ones((self.dynamic_scale[0], self.dynamic_scale[1], 3), dtype=retrieve_img.dtype) * self.pad_val
-            )
-        else:
-            out_img = np.ones(self.dynamic_scale, dtype=retrieve_img.dtype) * self.pad_val
-
-        # 1. keep_ratio resize
-        scale_ratio = min(self.dynamic_scale[0] / retrieve_img.shape[0], self.dynamic_scale[1] / retrieve_img.shape[1])
-        retrieve_img = cv2.resize(
-            retrieve_img,
-            (int(retrieve_img.shape[1] * scale_ratio), int(retrieve_img.shape[0] * scale_ratio)),
-            interpolation=cv2.INTER_LINEAR,
-        )
-
-        # 2. paste
-        out_img[: retrieve_img.shape[0], : retrieve_img.shape[1]] = retrieve_img
-
-        # 3. scale jit
-        scale_ratio *= jit_factor
-        out_img = cv2.resize(
-            out_img,
-            (int(out_img.shape[1] * jit_factor), int(out_img.shape[0] * jit_factor)),
-            interpolation=cv2.INTER_LINEAR,
-        )
-
-        # 4. flip
-        if is_flip:
-            out_img = out_img[:, ::-1, :]
-
-        # 5. random crop
-        ori_img: np.ndarray = to_np_image(inputs.image)
-        origin_h, origin_w = out_img.shape[:2]
-        target_h, target_w = ori_img.shape[:2]
-        padded_img = np.ones((max(origin_h, target_h), max(origin_w, target_w), 3)) * self.pad_val
-        padded_img[:origin_h, :origin_w] = out_img
-
-        x_offset, y_offset = 0, 0
-        if padded_img.shape[0] > target_h:
-            y_offset = RNG.integers(0, padded_img.shape[0] - target_h)
-        if padded_img.shape[1] > target_w:
-            x_offset = RNG.integers(0, padded_img.shape[1] - target_w)
-        padded_cropped_img = padded_img[y_offset : y_offset + target_h, x_offset : x_offset + target_w]
-
-        # 6. adjust bbox
-        retrieve_gt_bboxes = retrieve_results.bboxes
-        retrieve_gt_bboxes = rescale_bboxes(retrieve_gt_bboxes, (scale_ratio, scale_ratio))
-
-        if self.bbox_clip_border:
-            retrieve_gt_bboxes = clip_bboxes(retrieve_gt_bboxes, (origin_h, origin_w))
-
-        if is_flip:
-            retrieve_gt_bboxes = flip_bboxes(retrieve_gt_bboxes, (origin_h, origin_w), direction="horizontal")
-
-        # 7. filter
-        cp_retrieve_gt_bboxes = retrieve_gt_bboxes.clone()
-        cp_retrieve_gt_bboxes = translate_bboxes(cp_retrieve_gt_bboxes, (-x_offset, -y_offset))
-
-        if self.bbox_clip_border:
-            cp_retrieve_gt_bboxes = clip_bboxes(cp_retrieve_gt_bboxes, (target_h, target_w))
-
-        # 8. mix up
-        ori_img = ori_img.astype(np.float32)
-        mixup_img = 0.5 * ori_img + 0.5 * padded_cropped_img.astype(np.float32)
-
-        retrieve_gt_bboxes_labels = retrieve_results.label
-
-        mixup_gt_bboxes = torch.cat((inputs.bboxes, cp_retrieve_gt_bboxes), dim=0)
-        mixup_gt_bboxes_labels = torch.cat((inputs.label, retrieve_gt_bboxes_labels), dim=0)
-
-        # remove outside bbox
-        inside_inds = is_inside_bboxes(mixup_gt_bboxes, (target_h, target_w))
-        mixup_gt_bboxes = mixup_gt_bboxes[inside_inds]
-        mixup_gt_bboxes_labels = mixup_gt_bboxes_labels[inside_inds]
-
-        inputs.image = mixup_img.astype(np.uint8)
-        inputs.img_info = _resized_crop_image_info(
-            inputs.img_info,
-            mixup_img.shape[:2],
-        )  # TODO (sungchul): need to add proper function
-        inputs.bboxes = tv_tensors.BoundingBoxes(mixup_gt_bboxes, format="XYXY", canvas_size=mixup_img.shape[:2])
-        inputs.label = mixup_gt_bboxes_labels
-        if with_mask:
-            inside_inds = inside_inds.numpy()
-            if (masks := getattr(retrieve_results, "masks", None)) is not None and len(masks) > 0:
-                masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks
-
-                # 6. adjust bbox
-                retrieve_gt_masks = rescale_masks(masks, scale_ratio)
-                if is_flip:
-                    retrieve_gt_masks = flip_masks(retrieve_gt_masks)
-
-                # 7. filter
-                retrieve_gt_masks = translate_masks(
-                    retrieve_gt_masks,
-                    out_shape=(target_h, target_w),
-                    offset=-x_offset,
-                    direction="horizontal",
-                )
-                retrieve_gt_masks = translate_masks(
-                    retrieve_gt_masks,
-                    out_shape=(target_h, target_w),
-                    offset=-y_offset,
-                    direction="vertical",
-                )
-
-                # 8. mix up
-                inputs_masks = inputs.masks.numpy() if not isinstance(inputs.masks, np.ndarray) else inputs.masks
-                mixup_gt_masks = np.concatenate([inputs_masks, retrieve_gt_masks])
-
-                inputs.masks = mixup_gt_masks[inside_inds]
-
-        return self.convert(inputs)
-
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += f"(dynamic_scale={self.dynamic_scale}, "
-        repr_str += f"ratio_range={self.ratio_range}, "
-        repr_str += f"flip_ratio={self.flip_ratio}, "
-        repr_str += f"pad_val={self.pad_val}, "
-        repr_str += f"max_iters={self.max_iters}, "
-        repr_str += f"bbox_clip_border={self.bbox_clip_border}, "
-        repr_str += f"max_cached_images={self.max_cached_images}, "
-        repr_str += f"random_pop={self.random_pop}, "
-        repr_str += f"prob={self.prob}, "
-        repr_str += f"is_numpy_to_tvtensor={self.is_numpy_to_tvtensor})"
-        return repr_str
-
-
-class YOLOXHSVRandomAug(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmdet.datasets.transforms.YOLOXHSVRandomAug with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L2905-L2961
-
-    TODO : optimize logic to torcivision pipeline
-
-    Args:
-        hue_delta (int): delta of hue. Defaults to 5.
-        saturation_delta (int): delta of saturation. Defaults to 30.
-        value_delta (int): delat of value. Defaults to 30.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-    """
-
-    def __init__(
-        self,
-        hue_delta: int = 5,
-        saturation_delta: int = 30,
-        value_delta: int = 30,
-        is_numpy_to_tvtensor: bool = True,
-    ) -> None:
-        super().__init__()
-
-        self.hue_delta = hue_delta
-        self.saturation_delta = saturation_delta
-        self.value_delta = value_delta
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-
-    @cache_randomness
-    def _get_hsv_gains(self) -> np.ndarray:
-        hsv_gains = RNG.uniform(-1, 1, 3) * [
-            self.hue_delta,
-            self.saturation_delta,
-            self.value_delta,
-        ]
-        # random selection of h, s, v
-        hsv_gains *= RNG.integers(0, 2, 3)
-        # prevent overflow
-        return hsv_gains.astype(np.int16)
-
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Forward for random hsv transform."""
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-
-        img: np.ndarray = to_np_image(inputs.image)
-        hsv_gains = self._get_hsv_gains()
-        # Convert RGB to HSV for color augmentation
-        img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV).astype(np.int16)
-
-        img_hsv[..., 0] = (img_hsv[..., 0] + hsv_gains[0]) % 180
-        img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_gains[1], 0, 255)
-        img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_gains[2], 0, 255)
-        img = cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2RGB)
-        inputs.image = img
-        return self.convert(inputs)
-
-    def __repr__(self):
-        repr_str = self.__class__.__name__
-        repr_str += f"(hue_delta={self.hue_delta}, "
-        repr_str += f"saturation_delta={self.saturation_delta}, "
-        repr_str += f"value_delta={self.value_delta}, "
-        repr_str += f"is_numpy_to_tvtensor={self.is_numpy_to_tvtensor})"
-        return repr_str
-
-
-class Pad(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmdet.datasets.transforms.Pad with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/datasets/transforms/transforms.py#L705-L784
-
-    TODO : optimize logic to torcivision pipeline
-
-    Args:
-        size (tuple, optional): Fixed padding size.
-            Expected padding shape (height, width). Defaults to None.
-        size_divisor (int, optional): The divisor of padded size. Defaults to
-            None.
-        pad_to_square (bool): Whether to pad the image into a square.
-            Currently only used for YOLOX. Defaults to False.
-        pad_val (int | float | dict[str, int | float], optional) - Padding value for if
-            the pad_mode is "constant".  If it is a single number, the value
-            to pad the image is the number and to pad the semantic
-            segmentation map is 255. If it is a dict, it should have the
-            following keys:
-
-            - img: The value to pad the image.
-            - seg: The value to pad the semantic segmentation map.
-            Defaults to dict(img=0, seg=255).
-        padding_mode (str): Type of padding. Should be: constant, edge,
-            reflect or symmetric. Defaults to 'constant'.
-
-            - constant: pads with a constant value, this value is specified
-              with pad_val.
-            - edge: pads with the last value at the edge of the image.
-            - reflect: pads with reflection of image without repeating the last
-              value on the edge. For example, padding [1, 2, 3, 4] with 2
-              elements on both sides in reflect mode will result in
-              [3, 2, 1, 2, 3, 4, 3, 2].
-            - symmetric: pads with reflection of image repeating the last value
-              on the edge. For example, padding [1, 2, 3, 4] with 2 elements on
-              both sides in symmetric mode will result in
-              [2, 1, 1, 2, 3, 4, 4, 3]
-        transform_mask (bool): Whether to transform masks. Defaults to False.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-    """
-
-    border_type: ClassVar = {
-        "constant": cv2.BORDER_CONSTANT,
-        "edge": cv2.BORDER_REPLICATE,
-        "reflect": cv2.BORDER_REFLECT_101,
-        "symmetric": cv2.BORDER_REFLECT,
-    }
-
-    def __init__(
-        self,
-        size: tuple[int, int] | None = None,  # (H, W)
-        size_divisor: int | None = None,
-        pad_to_square: bool = False,
-        pad_val: int | float | dict | None = None,
-        padding_mode: str = "constant",
-        transform_point: bool = False,
-        transform_mask: bool = False,
-        is_numpy_to_tvtensor: bool = True,
-    ) -> None:
-        super().__init__()
-
-        self.size = size
-        self.size_divisor = size_divisor
-        pad_val = pad_val or {"img": 0, "mask": 0}
-        if isinstance(pad_val, int):
-            pad_val = {"img": pad_val, "mask": 0}
-        assert isinstance(pad_val, dict), "pad_val "  # noqa: S101
-        self.pad_val = pad_val
-        self.pad_to_square = pad_to_square
-
-        if pad_to_square:
-            assert size is None, "The size and size_divisor must be None when pad2square is True"  # noqa: S101
-        else:
-            assert size is not None or size_divisor is not None, "only one of size and size_divisor should be valid"  # noqa: S101
-            assert size is None or size_divisor is None  # noqa: S101
-        assert padding_mode in ["constant", "edge", "reflect", "symmetric"]  # noqa: S101
-        self.padding_mode = padding_mode
-        self.transform_point = transform_point
-        self.transform_mask = transform_mask
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-
-    def _pad_img(self, inputs: OTXSample) -> OTXSample:
-        """Pad images according to ``self.size``."""
-        img: np.ndarray = to_np_image(inputs.image)
-        pad_val = self.pad_val.get("img", 0)
-
-        size: tuple[int, int]
-        if self.pad_to_square:
-            max_size = max(img.shape[:2])
-            size = (max_size, max_size)
-
-        if self.size_divisor is not None:
-            if not self.pad_to_square:
-                size = (img.shape[0], img.shape[1])
-            pad_h = int(np.ceil(size[0] / self.size_divisor)) * self.size_divisor
-            pad_w = int(np.ceil(size[1] / self.size_divisor)) * self.size_divisor
-            size = (pad_h, pad_w)
-        elif self.size is not None:
-            size = self.size  # (H, W)
-
-        if isinstance(pad_val, int) and img.ndim == 3:
-            pad_val = tuple(pad_val for _ in range(img.shape[2]))
-
-        width = max(size[1] - img.shape[1], 0)
-        height = max(size[0] - img.shape[0], 0)
-        padding = [0, 0, width, height]
-
-        padded_img = cv2.copyMakeBorder(
-            img,
-            padding[1],
-            padding[3],
-            padding[0],
-            padding[2],
-            self.border_type[self.padding_mode],
-            value=pad_val,
-        )
-        inputs.image = padded_img
-        inputs.img_info = _pad_image_info(inputs.img_info, padding)
-        return inputs
-
-    @typing.no_type_check  # TODO(ashwinvaidya17): temporary
-    def _pad_masks(self, inputs: OTXSample) -> OTXSample:
-        """Pad masks according to inputs.image_info.padding."""
-        if (masks := getattr(inputs, "masks", None)) is not None and len(masks) > 0:
-            masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks
-
-            pad_val = self.pad_val.get("mask", 0)
-            padding = inputs.img_info.padding
-
-            padded_masks = []
-            for mask in masks:
-                orig_dtype = mask.dtype
-                # cv2.copyMakeBorder does not support bool, so cast to uint8 if needed
-                if mask.dtype == np.bool_:
-                    mask_to_pad = mask.astype(np.uint8)
-                    pad_val_cast = int(bool(pad_val))
-                else:
-                    mask_to_pad = mask
-                    pad_val_cast = pad_val
-
-                padded = cv2.copyMakeBorder(
-                    mask_to_pad,
-                    padding[1],
-                    padding[3],
-                    padding[0],
-                    padding[2],
-                    self.border_type[self.padding_mode],
-                    value=pad_val_cast,
-                )
-                # Cast back to original dtype if needed
-                if orig_dtype == np.bool_:
-                    padded = padded.astype(np.bool_)
-                padded_masks.append(padded)
-
-            inputs.masks = np.stack(padded_masks)
-
-        return inputs
-
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Forward function to pad images."""
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-
-        outputs = self._pad_img(inputs)
-
-        if self.transform_mask:
-            outputs = self._pad_masks(outputs)
-
-        return self.convert(outputs)
-
-
-class RandomResize(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmcv.transforms.RandomResize with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmcv/blob/v2.1.0/mmcv/transforms/processing.py#L1381-L1562
-
-    Args:
-        scale (Sequence): Images scales for resizing with (height, width). Defaults to None.
-        ratio_range (tuple[float], optional): (min_ratio, max_ratio). Defaults to None.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-        **resize_kwargs: Other keyword arguments for the ``resize_type``.
-    """
-
-    def __init__(
-        self,
-        scale: Sequence[int | tuple[int, int]],  # (H, W)
-        ratio_range: tuple[float, float] | None = None,
-        is_numpy_to_tvtensor: bool = True,
-        **resize_kwargs,
-    ) -> None:
-        super().__init__()
-        if isinstance(scale, list):
-            scale = tuple(scale)
-        self.scale = scale
-        self.ratio_range = ratio_range
-        self.resize_kwargs = resize_kwargs
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-        self.resize = Resize(scale=0, **resize_kwargs, is_numpy_to_tvtensor=is_numpy_to_tvtensor)
-
-    @staticmethod
-    def _random_sample(scales: Sequence[tuple[int, int]]) -> tuple:
-        """Private function to randomly sample a scale from a Sequence of tuples.
-
-        Args:
-            scales (Sequence[tuple]): Images scale range for sampling.
-                There must be two tuples in scales, which specify the lower
-                and upper bound of image scales.
-
-        Returns:
-            (tuple): The targeted scale of the image to be resized.
-        """
-        assert isinstance(scales, Sequence)  # noqa: S101
-        assert all(isinstance(scale, tuple) for scale in scales)  # noqa: S101
-        assert len(scales) == 2  # noqa: S101
-        scale_0 = [scales[0][0], scales[1][0]]
-        scale_1 = [scales[0][1], scales[1][1]]
-        edge_0 = RNG.integers(min(scale_0), max(scale_0) + 1)
-        edge_1 = RNG.integers(min(scale_1), max(scale_1) + 1)
-        return (edge_0, edge_1)
-
-    @staticmethod
-    def _random_sample_ratio(scale: tuple, ratio_range: tuple[float, float]) -> tuple:
-        """Private function to randomly sample a scale from a tuple.
-
-        A ratio will be randomly sampled from the range specified by
-        ``ratio_range``. Then it would be multiplied with ``scale`` to
-        generate sampled scale.
-
-        Args:
-            scale (tuple): Images scale base to multiply with ratio.
-            ratio_range (tuple[float]): The minimum and maximum ratio to scale
-                the ``scale``.
-
-        Returns:
-            (tuple): The targeted scale of the image to be resized.
-        """
-        assert isinstance(scale, tuple)  # noqa: S101
-        assert len(scale) == 2  # noqa: S101
-        min_ratio, max_ratio = ratio_range
-        assert min_ratio <= max_ratio  # noqa: S101
-        ratio = RNG.random() * (max_ratio - min_ratio) + min_ratio
-        return int(scale[0] * ratio), int(scale[1] * ratio)
-
-    @cache_randomness
-    def _random_scale(self) -> tuple:
-        """Private function to randomly sample an scale according to the type of ``scale``.
-
-        Returns:
-            (tuple): The targeted scale of the image to be resized.
-        """
-        if isinstance(self.scale, tuple) and all(isinstance(s, int) for s in self.scale):
-            assert self.ratio_range is not None  # noqa: S101
-            assert len(self.ratio_range) == 2  # noqa: S101
-            scale = self._random_sample_ratio(self.scale, self.ratio_range)
-        elif all(isinstance(s, tuple) for s in self.scale):
-            scale = self._random_sample(self.scale)  # type: ignore[arg-type]
-        else:
-            msg = f'Do not support sampling function for "{self.scale}"'
-            raise NotImplementedError(msg)
-
-        return scale
-
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Transform function to resize images, bounding boxes, semantic segmentation map."""
-        self.resize.scale = self._random_scale()
-        outputs = self.resize(*_inputs)
-        return self.convert(outputs)
-
-    def __repr__(self) -> str:
-        # TODO (sungchul): update other's repr
-        repr_str = self.__class__.__name__
-        repr_str += f"(scale={self.scale}, "
-        repr_str += f"ratio_range={self.ratio_range}, "
-        repr_str += f"is_numpy_to_tvtensor={self.is_numpy_to_tvtensor}, "
-        repr_str += f"resize_kwargs={self.resize_kwargs})"
-        return repr_str
-
-
-class RandomCrop(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Implementation of mmdet.datasets.transforms.RandomCrop with torchvision format.
-
-    Reference : https://github.com/open-mmlab/mmcv/blob/v2.1.0/mmcv/transforms/processing.py#L1381-L1562
-
-    The absolute `crop_size` is sampled based on `crop_type` and `image_size`, then the cropped results are generated.
-
-    Args:
-        crop_size (tuple[int, int]): The relative ratio or absolute pixels of
-            (height, width).
-        crop_type (str, optional): One of "relative_range", "relative",
-            "absolute", "absolute_range". "relative" randomly crops
-            (h * crop_size[0], w * crop_size[1]) part from an input of size
-            (h, w). "relative_range" uniformly samples relative crop size from
-            range [crop_size[0], 1] and [crop_size[1], 1] for height and width
-            respectively. "absolute" crops from an input with absolute size
-            (crop_size[0], crop_size[1]). "absolute_range" uniformly samples
-            crop_h in range [crop_size[0], min(h, crop_size[1])] and crop_w
-            in range [crop_size[0], min(w, crop_size[1])].
-            Defaults to "absolute".
-        cat_max_ratio (float): The maximum ratio that single category could occupy.
-        allow_negative_crop (bool, optional): Whether to allow a crop that does
-            not contain any bbox area. Defaults to False.
-        recompute_bbox (bool, optional): Whether to re-compute the boxes based
-            on cropped instance masks. Defaults to False.
-        bbox_clip_border (bool, optional): Whether clip the objects outside
-            the border of the image. Defaults to True.
-        ignore_index (int): The label index to be ignored. Defaults to 255.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-    """
-
-    def __init__(
-        self,
-        crop_size: tuple[int, int],  # (H, W)
-        crop_type: str = "absolute",
-        cat_max_ratio: int | float = 1,
-        allow_negative_crop: bool = False,
-        recompute_bbox: bool = False,
-        bbox_clip_border: bool = True,
-        ignore_index: int = 255,
-        is_numpy_to_tvtensor: bool = True,
-    ) -> None:
-        super().__init__()
-        if crop_type not in ["relative_range", "relative", "absolute", "absolute_range"]:
-            msg = f"Invalid crop_type {crop_type}."
-            raise ValueError(msg)
-        if crop_type in ["absolute", "absolute_range"]:
-            assert crop_size[0] > 0  # noqa: S101
-            assert crop_size[1] > 0  # noqa: S101
-            assert isinstance(crop_size[0], int)  # noqa: S101
-            assert isinstance(crop_size[1], int)  # noqa: S101
-            if crop_type == "absolute_range":
-                assert crop_size[0] <= crop_size[1]  # noqa: S101
-        else:
-            assert 0 < crop_size[0] <= 1  # noqa: S101
-            assert 0 < crop_size[1] <= 1  # noqa: S101
-        self.crop_size = crop_size  # (H, W)
-        self.crop_type = crop_type
-        self.cat_max_ratio = cat_max_ratio
-        self.allow_negative_crop = allow_negative_crop
-        self.bbox_clip_border = bbox_clip_border
-        self.recompute_bbox = recompute_bbox
-        self.ignore_index = ignore_index
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-
-    def _generate_crop_bbox(
-        self,
-        orig_shape: tuple[int, int],
-        crop_size: tuple[int, int],
-    ) -> tuple:
-        """Randomly get a crop bounding box.
-
-        Args:
-            orig_shape (tuple): The original shape of the image.
-            crop_size (tuple): The size of the crop.
-
-        Returns:
-            tuple: Coordinates of the cropped image.
-        """
-        margin_h = max(orig_shape[0] - crop_size[0], 0)
-        margin_w = max(orig_shape[1] - crop_size[1], 0)
-        offset_h, offset_w = self._rand_offset((margin_h, margin_w))
-        crop_y1, crop_y2 = offset_h, offset_h + crop_size[0]
-        crop_x1, crop_x2 = offset_w, offset_w + crop_size[1]
-
-        return (crop_x1, crop_y1, crop_x2, crop_y2), offset_h, offset_w
-
-    @typing.no_type_check  # TODO(ashwinvaidya17): temporary
-    def _crop_data(
-        self,
-        inputs: OTXSample,
-        crop_size: tuple[int, int],
-        allow_negative_crop: bool,
-    ) -> OTXSample | None:
-        """Function to randomly crop images, bounding boxes, masks, semantic segmentation maps."""
-        assert crop_size[0] > 0  # noqa: S101
-        assert crop_size[1] > 0  # noqa: S101
-
-        img: np.ndarray = to_np_image(inputs.image)
-        orig_shape = inputs.img_info.img_shape
-        crop_bbox, offset_h, offset_w = self._generate_crop_bbox(orig_shape, crop_size)
-
-        # for semantic segmentation
-        # reference : https://github.com/open-mmlab/mmsegmentation/blob/v1.2.1/mmseg/datasets/transforms/transforms.py#L281-L290
-        if (self.cat_max_ratio < 1.0) and ((masks := getattr(inputs, "masks", None)) is not None and len(masks) > 0):
-            # Repeat 10 times
-            for _ in range(10):
-                seg_temp = crop_masks(masks, np.array(crop_bbox))
-                labels, cnt = np.unique(seg_temp, return_counts=True)
-                cnt = cnt[labels != self.ignore_index]
-                if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < self.cat_max_ratio:
-                    break
-                crop_bbox, offset_h, offset_w = self._generate_crop_bbox(orig_shape, crop_size)
-
-        # crop the image
-        crop_x1, crop_y1, crop_x2, crop_y2 = crop_bbox
-        img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
-        cropped_img_shape = img.shape[:2]
-
-        inputs.image = img
-        inputs.img_info = _crop_image_info(inputs.img_info, *cropped_img_shape)
-
-        valid_inds: np.ndarray = np.array([1])  # for semantic segmentation
-        # crop bboxes accordingly and clip to the image boundary
-        if (bboxes := getattr(inputs, "bboxes", None)) is not None:
-            bboxes = translate_bboxes(bboxes, [-offset_w, -offset_h])
-            if self.bbox_clip_border:
-                bboxes = clip_bboxes(bboxes, cropped_img_shape)
-
-            valid_inds = is_inside_bboxes(bboxes, cropped_img_shape).numpy()
-            # If the crop does not contain any gt-bbox area and
-            # allow_negative_crop is False, skip this image.
-            if not valid_inds.any() and not allow_negative_crop:
-                return None
-
-            inputs.bboxes = tv_tensors.BoundingBoxes(bboxes[valid_inds], format="XYXY", canvas_size=cropped_img_shape)
-
-            if inputs.label is not None:
-                inputs.label = inputs.label[valid_inds]
-
-        if (masks := getattr(inputs, "masks", None)) is not None and len(masks) > 0:
-            masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks
-            inputs.masks = crop_masks(
-                masks[valid_inds.nonzero()[0]],
-                np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]),
-            )
-
-            if self.recompute_bbox:
-                inputs.bboxes = tv_tensors.wrap(
-                    torch.as_tensor(get_bboxes_from_masks(inputs.masks)),
-                    like=inputs.bboxes,
-                )
-
-        return inputs
-
-    @cache_randomness
-    def _rand_offset(self, margin: tuple[int, int]) -> tuple[int, int]:
-        """Randomly generate crop offset.
-
-        Args:
-            margin (tuple[int, int]): The upper bound for the offset generated
-                randomly.
-
-        Returns:
-            tuple[int, int]: The random offset for the crop.
-        """
-        margin_h, margin_w = margin
-        offset_h = RNG.integers(0, margin_h + 1)
-        offset_w = RNG.integers(0, margin_w + 1)
-
-        return offset_h, offset_w
-
-    @cache_randomness
-    def _get_crop_size(self, image_size: tuple[int, int]) -> tuple[int, int]:
-        """Randomly generates the absolute crop size based on `crop_type` and `image_size`.
-
-        Args:
-            image_size (tuple[int, int]): (h, w).
-
-        Returns:
-            crop_size (tuple[int, int]): (crop_h, crop_w) in absolute pixels.
-        """
-        h, w = image_size
-        if self.crop_type == "absolute":
-            return min(self.crop_size[0], h), min(self.crop_size[1], w)
-
-        if self.crop_type == "absolute_range":
-            # `self.crop_size` is used as range, not absolute value
-            crop_h = RNG.integers(min(h, self.crop_size[0]), min(h, self.crop_size[1]) + 1)
-            crop_w = RNG.integers(min(w, self.crop_size[0]), min(w, self.crop_size[1]) + 1)
-            return crop_h, crop_w
-
-        if self.crop_type == "relative":
-            crop_h, crop_w = self.crop_size
-            return int(h * crop_h + 0.5), int(w * crop_w + 0.5)
-
-        # 'relative_range'
-        crop_size = np.asarray(self.crop_size, dtype=np.float32)
-        crop_h, crop_w = crop_size + RNG.random(2) * (1 - crop_size)
-        return int(h * crop_h + 0.5), int(w * crop_w + 0.5)
-
-    @typing.no_type_check  # TODO(ashwinvaidya17): temporary
-    def forward(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Transform function to randomly crop images, bounding boxes, and masks."""
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-
-        crop_size = self._get_crop_size(inputs.img_info.img_shape)
-
-        outputs = self._crop_data(inputs, crop_size, self.allow_negative_crop)
-        return self.convert(outputs)
-
-    def __repr__(self) -> str:
-        repr_str = self.__class__.__name__
-        repr_str += f"(crop_size={self.crop_size}, "
-        repr_str += f"crop_type={self.crop_type}, "
-        repr_str += f"allow_negative_crop={self.allow_negative_crop}, "
-        repr_str += f"recompute_bbox={self.recompute_bbox}, "
-        repr_str += f"bbox_clip_border={self.bbox_clip_border}, "
-        repr_str += f"is_numpy_to_tvtensor={self.is_numpy_to_tvtensor})"
-        return repr_str
-
-
-class Compose(tvt_v2.Compose):
-    """Re-implementation of torchvision.transforms.v2.Compose.
-
-    MMCV transforms can produce None, so it is required to skip the result.
-
-    This class also handles native torchvision transforms by extracting only the
-    transformable fields (image, masks, bboxes) and applying transforms to them,
-    avoiding transforms being applied to non-image tensors like labels.
-    """
-
-    def _is_native_torchvision_transform(self, transform: tvt_v2.Transform) -> bool:
-        """Check if the transform is a native torchvision transform."""
-        module = type(transform).__module__
-        return module.startswith("torchvision.")
-
-    def _apply_native_transform(self, transform: tvt_v2.Transform, inputs: OTXSample) -> OTXSample:
-        """Apply native torchvision transform only to image-related fields.
-
-        TorchVision v2 expects standard field names like `boxes`/`labels`; we
-        map to those before calling the transform and map back afterward.
-        We also keep `img_info` in sync when the image size changes.
-        """
-        # Build a dict of transformable fields with torchvision-friendly keys
-        transformable: dict[str, Any] = {}
-        if (image := getattr(inputs, "image", None)) is not None:
-            transformable["image"] = image
-        if (masks := getattr(inputs, "masks", None)) is not None:
-            transformable["masks"] = masks
-        if (bboxes := getattr(inputs, "bboxes", None)) is not None:
-            transformable["boxes"] = bboxes
-        if (label := getattr(inputs, "label", None)) is not None:
-            transformable["labels"] = label
-        if (img_info := getattr(inputs, "img_info", None)) is not None:
-            transformable["img_info"] = img_info
-
-        if not transformable:
-            return inputs
-
-        # Apply transform to transformable fields
-        # If there's only an image, pass it directly; otherwise pass as dict
-        if len(transformable) == 1 and "image" in transformable:
-            result = transform(transformable["image"])
-            inputs.image = result
-        else:
-            result = transform(transformable)
-            if isinstance(result, dict):
-                for key, value in result.items():
-                    if key == "boxes":
-                        inputs.bboxes = value  # type: ignore[misc]
-                    elif key == "labels":
-                        inputs.label = value  # type: ignore[misc]
-                    else:
-                        setattr(inputs, key, value)
-            else:
-                # Single result, assume it's the image
-                inputs.image = result
-        return inputs
-
-    def forward(self, *inputs: OTXSample) -> OTXSample | None:
-        """Forward with skipping None."""
-        needs_unpacking = len(inputs) > 1
-        for transform in self.transforms:
-            if self._is_native_torchvision_transform(transform):
-                # Apply native transforms only to image-related fields
-                outputs = self._apply_native_transform(transform, inputs[0])
-            else:
-                outputs = transform(*inputs)
-            # MMCV transform can produce None. Please see
-            # https://github.com/open-mmlab/mmengine/blob/26f22ed283ae4ac3a24b756809e5961efe6f9da8/mmengine/dataset/base_dataset.py#L59-L66
-            if outputs is None:
-                return outputs
-            inputs = outputs if needs_unpacking else (outputs,)  # type: ignore[assignment]
-        return outputs
-
-
-class TopdownAffine(tvt_v2.Transform, NumpytoTVTensorMixin):
-    """Get the bbox image as the model input by affine transform.
-
-    Args:
-        input_size (tuple[int, int]): The size of the model input.
-        probability (float): The probability of applying affine
-            transforms. Defaults to 0.5.
-        is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False.
-        shift_factor (float): The factor of shift. Defaults to 0.16.
-        shift_prob (float): The probability of shift. Defaults to 0.3.
-        scale_factor (tuple[float, float]): The factor of scale. Defaults to (0.5, 1.5).
-        scale_prob (float): The probability of scale. Defaults to 1.0.
-        rotate_factor (float): The factor of rotate. Defaults to 80.0.
-        rotate_prob (float): The probability of rotate. Defaults to 0.5.
-        interpolation (str): The interpolation method. Defaults to "bilinear".
-    """
-
-    def __init__(
-        self,
-        input_size: tuple[int, int],
-        probability: float = 1.0,
-        is_numpy_to_tvtensor: bool = True,
-        shift_factor: float = 0.16,
-        shift_prob: float = 0.3,
-        scale_factor: tuple[float, float] = (0.5, 1.5),
-        scale_prob: float = 1.0,
-        rotate_factor: float = 80.0,
-        rotate_prob: float = 0.5,
-        interpolation: str = "bilinear",
-    ) -> None:
-        super().__init__()
-
-        self.input_size = input_size
-        self.is_numpy_to_tvtensor = is_numpy_to_tvtensor
-        self.affine_transforms_prob = probability
-        self.shift_factor = shift_factor
-        self.shift_prob = shift_prob
-        self.scale_factor = scale_factor
-        self.scale_prob = scale_prob
-        self.rotate_factor = rotate_factor
-        self.rotate_prob = rotate_prob
-        self.interpolation = interpolation
-
-    @staticmethod
-    def _fix_aspect_ratio(bbox_scale: np.ndarray, aspect_ratio: float) -> np.ndarray:
-        """Reshape the bbox to a fixed aspect ratio.
-
-        Args:
-            bbox_scale (np.ndarray): The bbox scales (w, h) in shape (n, 2)
-            aspect_ratio (float): The ratio of ``w/h``
-
-        Returns:
-            np.darray: The reshaped bbox scales in (n, 2)
-        """
-        w, h = np.hsplit(bbox_scale, [1])
-        return np.where(w > h * aspect_ratio, np.hstack([w, w / aspect_ratio]), np.hstack([h * aspect_ratio, h]))
-
-    @staticmethod
-    def _truncnorm(low: float = -1.0, high: float = 1.0, size: int = 4) -> torch.Tensor:
-        """Sample from a truncated normal distribution."""
-        return truncnorm.rvs(low, high, size=(size)).astype(np.float32)
-
-    @cache_randomness
-    def _get_transform_params(self) -> tuple:
-        """Get random transform parameters.
-
-        Args:
-            num_bboxes (int): The number of bboxes
-
-        Returns:
-            tuple:
-            - offset (np.ndarray): Offset factor of each bbox in shape (n, 2)
-            - scale (np.ndarray): Scaling factor of each bbox in shape (n, 1)
-            - rotate (np.ndarray): Rotation degree of each bbox in shape (n,)
-        """
-        random_v = self._truncnorm()
-        offset_v = random_v[:2]
-        scale_v = random_v[2:3]
-        rotate_v = random_v[3]
-
-        # Get shift parameters
-        offset = offset_v * self.shift_factor
-        offset = np.where(RNG.random(1) < self.shift_prob, offset, 0.0)
-
-        # Get scaling parameters
-        scale_min, scale_max = self.scale_factor
-        mu = (scale_max + scale_min) * 0.5
-        sigma = (scale_max - scale_min) * 0.5
-        scale = scale_v * sigma + mu
-        scale = np.where(RNG.random(1) < self.scale_prob, scale, 1.0)
-
-        # Get rotation parameters
-        rotate = rotate_v * self.rotate_factor
-        rotate = np.where(RNG.random() < self.rotate_prob, rotate, 0.0)
-
-        return offset, scale, rotate
-
-    @staticmethod
-    def _get_warp_matrix(
-        center: np.ndarray,
-        scale: np.ndarray,
-        rot: float,
-        output_size: tuple[int, int],
-        shift: tuple[float, float] = (0.0, 0.0),
-        inv: bool = False,
-        fix_aspect_ratio: bool = True,
-    ) -> np.ndarray:
-        """Calculate the affine transformation matrix that can warp the bbox area.
-
-        Args:
-            center (np.ndarray[2, ]): Center of the bounding box (x, y).
-            scale (np.ndarray[2, ]): Scale of the bounding box
-                wrt [width, height].
-            rot (float): Rotation angle (degree).
-            output_size (np.ndarray[2, ] | list(2,)): Size of the
-                destination heatmaps.
-            shift (float): Shift translation ratio wrt the width/height.
-                Default (0., 0.).
-            inv (bool): Option to inverse the affine transform direction.
-                (inv=False: src->dst or inv=True: dst->src)
-            fix_aspect_ratio (bool): Whether to fix aspect ratio during transform.
-                Defaults to True.
-
-        Returns:
-            np.ndarray: A 2x3 transformation matrix
-        """
-        if len(center) != 2 or len(scale) != 2 or len(output_size) != 2 or len(shift) != 2:
-            msg = "center, scale, output_size, and shift should have the length of 2."
-            raise ValueError(msg)
-
-        def _rotate_point(pt: np.ndarray, angle_rad: float) -> np.ndarray:
-            """Rotate a point by an angle."""
-            sn, cs = np.sin(angle_rad), np.cos(angle_rad)
-            rot_mat = np.array([[cs, -sn], [sn, cs]])
-            return rot_mat @ pt
-
-        def _get_3rd_point(a: np.ndarray, b: np.ndarray) -> np.ndarray:
-            """To calculate the affine matrix, three pairs of points are required.
-
-            This function is used to get the 3rd point, given 2D points a & b.
-
-            The 3rd point is defined by rotating vector `a - b` by 90 degrees
-            anticlockwise, using b as the rotation center.
-            """
-            direction = a - b
-            return b + np.r_[-direction[1], direction[0]]
-
-        shift = np.array(shift)
-        src_w, src_h = scale[:2]
-        dst_w, dst_h = output_size[:2]
-
-        rot_rad = np.deg2rad(rot)
-        src_dir = _rotate_point(np.array([src_w * -0.5, 0.0]), rot_rad)
-        dst_dir = np.array([dst_w * -0.5, 0.0])
-
-        src = np.zeros((3, 2), dtype=np.float32)
-        src[0, :] = center + scale * shift
-        src[1, :] = center + src_dir + scale * shift
-
-        dst = np.zeros((3, 2), dtype=np.float32)
-        dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
-        dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
-
-        if fix_aspect_ratio:
-            src[2, :] = _get_3rd_point(src[0, :], src[1, :])
-            dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
-        else:
-            src_dir_2 = _rotate_point(np.array([0.0, src_h * -0.5]), rot_rad)
-            dst_dir_2 = np.array([0.0, dst_h * -0.5])
-            src[2, :] = center + src_dir_2 + scale * shift
-            dst[2, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir_2
-
-        if inv:
-            warp_mat = cv2.getAffineTransform(np.float32(dst), np.float32(src))
-        else:
-            warp_mat = cv2.getAffineTransform(np.float32(src), np.float32(dst))
-        return warp_mat
-
-    @staticmethod
-    def _get_warp_image(
-        image: torch.Tensor | np.ndarray,
-        warp_mat: np.ndarray,
-        warp_size: tuple[int, int],
-    ) -> torch.Tensor:
-        numpy_image: np.ndarray = to_np_image(image)
-        warped_image = cv2.warpAffine(numpy_image, warp_mat, warp_size, flags=cv2.INTER_LINEAR)
-        return torch.from_numpy(warped_image).to(dtype=torch.float32).permute(2, 0, 1)
-
-    @typing.no_type_check  # TODO(ashwinvaidya17): temporary
-    def __call__(self, *_inputs: OTXSample) -> OTXSample | None:
-        """Transform function to affine image through warp matrix."""
-        assert len(_inputs) == 1, "[tmp] Multiple entity is not supported yet."  # noqa: S101
-        inputs = _inputs[0]
-
-        h, w = self.input_size
-        warp_size = (int(w), int(h))
-        apply_transforms = RNG.random()
-        ori_img_shape = inputs.img_info.ori_shape
-
-        if apply_transforms <= self.affine_transforms_prob:
-            bbox_center = np.array(ori_img_shape) / 2.0
-            bbox_scale = np.array(ori_img_shape)
-
-            offset, scale, rotate = self._get_transform_params()
-            center = bbox_center + offset * bbox_scale
-            scale = self._fix_aspect_ratio(bbox_scale * scale, aspect_ratio=w / h)
-            rot = rotate
-
-            warp_mat = self._get_warp_matrix(center, scale, rot, output_size=(w, h))
-            inputs.image = self._get_warp_image(inputs.image, warp_mat, warp_size)
-            if inputs.keypoints is not None:
-                keypoints = np.expand_dims(inputs.keypoints[:, :2], axis=0)
-                inputs.keypoints[:, :2] = torch.as_tensor(cv2.transform(keypoints, warp_mat)[0])
-
-        else:
-            resized_numpy_image = cv2.resize(
-                to_np_image(inputs.image),
-                warp_size,
-                interpolation=CV2_INTERP_CODES[self.interpolation],
-            )
-            inputs.image = torch.from_numpy(resized_numpy_image).to(dtype=torch.float32).permute(2, 0, 1)
-            if inputs.keypoints is not None:
-                scale_factor = (warp_size[0] / ori_img_shape[0], warp_size[1] / ori_img_shape[1])
-                inputs.keypoints[:, :2] = rescale_keypoints(inputs.keypoints[:, :2], scale_factor)
-
-        if inputs.keypoints is None:
-            inputs.keypoints = torch.zeros([])
-        else:
-            # update keypoints_visible after affine transforms
-            inputs.keypoints[:, 2] = inputs.keypoints[:, 2] * (inputs.keypoints[:, :2] > 0).all(axis=1)
-
-        return self.convert(inputs)
-
-    def __repr__(self) -> str:
-        """Print the basic information of the transform.
-
-        Returns:
-            str: Formatted string.
-        """
-        repr_str = self.__class__.__name__
-        repr_str += f"(input_size={self.input_size},"
-        repr_str += f"is_numpy_to_tvtensor={self.is_numpy_to_tvtensor})."
-        return repr_str
-
-
-class TorchVisionTransformLib:
-    """Helper to support TorchVision transforms (only V2) in OTX."""
-
-    @classmethod
-    def list_available_transforms(cls) -> list[type[tvt_v2.Transform]]:
-        """List available TorchVision transform (only V2) classes."""
-        return [
-            obj
-            for name in dir(tvt_v2)
-            if (obj := getattr(tvt_v2, name)) and isclass(obj) and issubclass(obj, tvt_v2.Transform)
-        ]
-
-    @classmethod
-    def generate(cls, config: SubsetConfig) -> Compose:
-        """Generate TorchVision transforms from the configuration."""
-        if isinstance(config.transforms, Compose):
-            return config.transforms
-
-        input_size = getattr(config, "input_size", None)
-        transforms = []
-        for cfg_transform in config.transforms:
-            if isinstance(cfg_transform, (dict, DictConfig)):
-                if not cfg_transform.get("enable", True):  # Optional "enable: false" flag would remove the transform
-                    continue
-                cls._configure_input_size(cfg_transform, input_size)
-            transform = cls._dispatch_transform(cfg_transform)
-            transforms.append(transform)
-
-        return Compose(transforms)
-
-    @classmethod
-    def _configure_input_size(cls, cfg_transform: dict[str, Any], input_size: int | tuple[int, int] | None) -> None:
-        """Evaluate the input_size and replace the placeholder in the init_args.
-
-        Input size should be specified as $(input_size). (e.g. $(input_size) * 0.5)
-        Only simple multiplication or division evaluation is supported. For example,
-        $(input_size) * -0.5    => supported
-        $(input_size) * 2.1 / 3 => supported
-        $(input_size) + 1       => not supported
-        The function decides to pass tuple type or int type based on the type hint of the argument.
-        float point values are rounded to int.
-        """
-        if input_size is not None:
-            _input_size: tuple[int, int] = (
-                (input_size, input_size) if isinstance(input_size, int) else tuple(input_size)  # type: ignore[assignment]
-            )
-
-        def check_type(value: Any, expected_type: Any) -> bool:  # noqa: ANN401
-            try:
-                typeguard.check_type(value, expected_type)
-            except typeguard.TypeCheckError:
-                return False
-            return True
-
-        model_cls = None
-        for key, val in cfg_transform.get("init_args", {}).items():
-            if not (isinstance(val, str) and "$(input_size)" in val):
-                continue
-            if input_size is None:
-                msg = (
-                    f"{cfg_transform['class_path'].split('.')[-1]} initial argument has `$(input_size)`, "
-                    "but input_size is set to None."
-                )
-                raise RuntimeError(msg)
-
-            if model_cls is None:
-                model_cls = import_object_from_module(cfg_transform["class_path"])
-
-            available_types = typing.get_type_hints(model_cls.__init__).get(key)
-            if available_types is None or check_type(_input_size, available_types):  # pass tuple[int, int]
-                cfg_transform["init_args"][key] = cls._eval_input_size_str(
-                    val.replace("$(input_size)", str(_input_size)),
-                )
-            elif check_type(_input_size[0], available_types):  # pass int
-                cfg_transform["init_args"][key] = cls._eval_input_size_str(
-                    val.replace("$(input_size)", str(_input_size[0])),
-                )
-            else:
-                msg = f"{key} argument should be able to get int or tuple[int, int], but it can get {available_types}"
-                raise RuntimeError(msg)
-
-    @classmethod
-    def _eval_input_size_str(cls, str_to_eval: str) -> tuple[int, ...] | int:
-        """Safe eval function for _configure_input_size.
-
-        The function is implemented for `_configure_input_size`, so implementation is aligned to it as below
-        - Only multiplication or division evaluation are supported.
-        - Only constant and tuple can be operand.
-        - tuple is changed to numpy array before evaluation.
-        - result value is rounded to int.
-        """
-        bin_ops = {
-            ast.Mult: operator.mul,
-            ast.Div: operator.truediv,
-        }
-
-        un_ops = {
-            ast.USub: operator.neg,
-            ast.UAdd: operator.pos,
-        }
-
-        available_ops = tuple(bin_ops) + tuple(un_ops) + (ast.BinOp, ast.UnaryOp)
-
-        tree = ast.parse(str_to_eval, mode="eval")
-
-        def _eval(node: Any) -> Any:  # noqa: ANN401
-            if isinstance(node, ast.Expression):
-                return _eval(node.body)
-            if isinstance(node, ast.Constant):
-                return node.value
-            if isinstance(node, ast.Tuple):
-                return np.array([_eval(val) for val in node.elts])
-            if isinstance(node, ast.BinOp) and type(node.op) in bin_ops:
-                left = _eval(node.left)
-                right = _eval(node.right)
-                return bin_ops[type(node.op)](left, right)
-            if isinstance(node, ast.UnaryOp) and type(node.op) in un_ops:
-                operand = _eval(node.operand) if isinstance(node.operand, available_ops) else node.operand.value
-                return un_ops[type(node.op)](operand)  # type: ignore[operator]
-            msg = f"Bad syntax, {type(node)}. Available operations for calcualting input size are {available_ops}"
-            raise SyntaxError(msg)
-
-        ret = _eval(tree)
-        if isinstance(ret, np.ndarray):
-            return tuple(ret.round().astype(np.int32).tolist())
-        return round(ret)
-
-    @classmethod
-    def _dispatch_transform(cls, cfg_transform: DictConfig | dict | tvt_v2.Transform) -> tvt_v2.Transform:
-        if isinstance(cfg_transform, (DictConfig, dict)):
-            transform = instantiate_class(args=(), init=cfg_transform)
-
-        elif isinstance(cfg_transform, tvt_v2.Transform):
-            transform = cfg_transform
-        else:
-            msg = (
-                "TorchVisionTransformLib accepts only three types "
-                "for config.transforms: DictConfig | dict | tvt_v2.Transform. "
-                f"However, its type is {type(cfg_transform)}."
-            )
-            raise TypeError(msg)
-
-        return transform
-
-
-class RandomIoUCrop(tvt_v2.RandomIoUCrop):
-    """Random IoU crop with the option to set probability.
-
-    Args:
-        min_scale (float, optional): the same as RandomIoUCrop. Defaults to 0.3.
-        max_scale (float, optional): the same as RandomIoUCrop. Defaults to 1.
-        min_aspect_ratio (float, optional): the same as RandomIoUCrop. Defaults to 0.5.
-        max_aspect_ratio (float, optional): the same as RandomIoUCrop. Defaults to 2.
-        sampler_options (list[float] | None, optional): the same as RandomIoUCrop. Defaults to None.
-        trials (int, optional): the same as RandomIoUCrop. Defaults to 40.
-        probability (float, optional): probability. Defaults to 1.0.
-    """
-
-    def __init__(
-        self,
-        min_scale: float = 0.3,
-        max_scale: float = 1,
-        min_aspect_ratio: float = 0.5,
-        max_aspect_ratio: float = 2,
-        sampler_options: list[float] | None = None,
-        trials: int = 40,
-        probability: float = 1.0,
-    ):
-        super().__init__(
-            min_scale,
-            max_scale,
-            min_aspect_ratio,
-            max_aspect_ratio,
-            sampler_options,
-            trials,
-        )
-        self.p = probability
-
-    def __call__(self, *inputs: Any) -> Any:  # noqa: ANN401
-        """Apply the transform to the given inputs."""
-        if torch.rand(1) >= self.p:
-            return inputs if len(inputs) > 1 else inputs[0]
-
-        return super().forward(*inputs)
diff --git a/library/src/otx/data/transform_libs/utils.py b/library/src/otx/data/transform_libs/utils.py
deleted file mode 100644
index cf104c183fe..00000000000
--- a/library/src/otx/data/transform_libs/utils.py
+++ /dev/null
@@ -1,776 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-# Copyright (c) OpenMMLab. All rights reserved.
-
-"""Utils for data transform functions."""
-
-from __future__ import annotations
-
-import copy
-import functools
-import inspect
-import weakref
-from typing import Sequence
-
-import cv2
-import numpy as np
-import torch
-from torch import BoolTensor, Tensor
-
-CV2_INTERP_CODES = {
-    "nearest": cv2.INTER_NEAREST,
-    "bilinear": cv2.INTER_LINEAR,
-    "bicubic": cv2.INTER_CUBIC,
-    "area": cv2.INTER_AREA,
-    "lanczos": cv2.INTER_LANCZOS4,
-}
-
-
-class cache_randomness:  # noqa: N801
-    """Decorator that marks the method with random return value(s) in a transform class.
-
-    Reference : https://github.com/open-mmlab/mmcv/blob/v2.1.0/mmcv/transforms/utils.py#L15-L87
-
-    This decorator is usually used together with the context-manager
-    :func`:cache_random_params`. In this context, a decorated method will
-    cache its return value(s) at the first time of being invoked, and always
-    return the cached values when being invoked again.
-
-    .. note::
-        Only an instance method can be decorated with ``cache_randomness``.
-    """
-
-    def __init__(self, func):  # noqa: ANN001
-        # Check `func` is to be bound as an instance method
-        if not inspect.isfunction(func):
-            msg = "Unsupport callable to decorate with@cache_randomness."
-            raise TypeError(msg)
-        func_args = inspect.getfullargspec(func).args
-        if len(func_args) == 0 or func_args[0] != "self":
-            msg = (
-                "@cache_randomness should only be used to decorate instance methods (the first argument is ``self``).",
-            )
-            raise TypeError(msg)
-
-        functools.update_wrapper(self, func)
-        self.func = func
-        self.instance_ref = None
-
-    def __set_name__(self, owner, name):  # noqa: ANN001
-        # Maintain a record of decorated methods in the class
-        if not hasattr(owner, "_methods_with_randomness"):
-            owner._methods_with_randomness = []  # noqa: SLF001
-
-        # Here `name` equals to `self.__name__`, i.e., the name of the
-        # decorated function, due to the invocation of `update_wrapper` in
-        # `self.__init__()`
-        owner._methods_with_randomness.append(name)  # noqa: SLF001
-
-    def __call__(self, *args, **kwargs):  # noqa: D102
-        # Get the transform instance whose method is decorated
-        # by cache_randomness
-        instance = self.instance_ref()
-        name = self.__name__
-
-        # Check the flag ``self._cache_enabled``, which should be
-        # set by the contextmanagers like ``cache_random_parameters```
-        cache_enabled = getattr(instance, "_cache_enabled", False)
-
-        if cache_enabled:
-            # Initialize the cache of the transform instances. The flag
-            # ``cache_enabled``` is set by contextmanagers like
-            # ``cache_random_params```.
-            if not hasattr(instance, "_cache"):
-                instance._cache = {}  # noqa: SLF001
-
-            if name not in instance._cache:  # noqa: SLF001
-                instance._cache[name] = self.func(instance, *args, **kwargs)  # noqa: SLF001
-            # Return the cached value
-            return instance._cache[name]  # noqa: SLF001
-
-        # Clear cache
-        if hasattr(instance, "_cache"):
-            del instance._cache  # noqa: SLF001
-        # Return function output
-        return self.func(instance, *args, **kwargs)
-
-    def __get__(self, obj, cls):  # noqa: ANN001
-        self.instance_ref = weakref.ref(obj)
-        # Return a copy to avoid multiple transform instances sharing
-        # one `cache_randomness` instance, which may cause data races
-        # in multithreading cases.
-        return copy.copy(self)
-
-
-def get_image_shape(img: np.ndarray | Tensor | list) -> tuple[int, int]:
-    """Get image(s) shape with (height, width)."""
-    if not isinstance(img, (np.ndarray, Tensor, list)):
-        msg = f"{type(img)} is not supported."
-        raise TypeError(msg)
-
-    if isinstance(img, np.ndarray):
-        return img.shape[:2]
-    if isinstance(img, Tensor):
-        return img.shape[-2:]
-    return get_image_shape(img[0])  # for list
-
-
-def to_np_image(img: np.ndarray | Tensor | list) -> np.ndarray | list[np.ndarray]:
-    """Convert torch.Tensor 3D image to numpy 3D image in HWC format.
-
-    TODO (sungchul): move it into base data entity?
-
-    """
-    if isinstance(img, np.ndarray):
-        # Check if the numpy array is in CHW format (channels should be <= 4 typically)
-        # If the first dimension is small (<=4) and smaller than other dimensions, it's likely CHW format
-        if img.ndim == 3 and img.shape[0] <= 4 and img.shape[0] < min(img.shape[1:]):
-            # Image is in CHW format, transpose to HWC
-            return np.ascontiguousarray(img.transpose(1, 2, 0))
-        return img
-    if isinstance(img, list):
-        return [to_np_image(im) for im in img]
-
-    # For tensors, check if it's already in HWC format before transposing
-    # If the last dimension is <= 4 and smaller than other dimensions, it's likely HWC format
-    if img.ndim == 3 and img.shape[-1] <= 4 and img.shape[-1] < min(img.shape[:-1]):
-        # Already HWC format, just convert to numpy
-        return np.ascontiguousarray(img.numpy())
-    # CHW format, transpose to HWC
-    return np.ascontiguousarray(img.numpy().transpose(1, 2, 0))
-
-
-def rescale_bboxes(boxes: Tensor, scale_factor: tuple[float, float]) -> Tensor:
-    """Rescale boxes w.r.t. rescale_factor in-place.
-
-    Note:
-        Both ``rescale_`` and ``resize_`` will enlarge or shrink boxes
-        w.r.t ``scale_facotr``. The difference is that ``resize_`` only
-        changes the width and the height of boxes, but ``rescale_`` also
-        rescales the box centers simultaneously.
-
-    Args:
-        boxes (Tensor): bounding boxes to be rescaled.
-        scale_factor (tuple[float, float]): factors for scaling boxes with (height, width).
-            It will be used after flipped. The length should be 2.
-
-    Returns:
-        (Tensor): rescaled bounding boxes.
-    """
-    assert len(scale_factor) == 2  # noqa: S101
-    scale_factor = boxes.new_tensor(scale_factor[::-1]).repeat(2)
-    return boxes * scale_factor
-
-
-def rescale_masks(
-    masks: np.ndarray,
-    scale_factor: float | tuple[float, float],  # (H, W)
-    interpolation: str = "nearest",
-) -> np.ndarray:
-    """Rescale masks as large as possible while keeping the aspect ratio.
-
-    Args:
-        masks (np.ndarray): Masks to be rescaled.
-        scale_factor (float | tuple[float, float]): Scale factor to be applied to masks with (height, width).
-        interpolation (str): Interpolation mode. Defaults to `nearest`.
-
-    Returns:
-        (np.ndarray) : The rescaled masks.
-    """
-    h, w = masks.shape[1:]
-    new_size = rescale_size((h, w), scale_factor)  # (H, W)
-
-    # flipping `new_size` is required because cv2.resize uses (W, H)
-    return np.stack(
-        [cv2.resize(mask, new_size[::-1], interpolation=CV2_INTERP_CODES[interpolation]) for mask in masks],
-    )
-
-
-def rescale_keypoints(keypoints: Tensor, scale_factor: float | tuple[float, float]) -> Tensor:
-    """Rescale keypoints as large as possible while keeping the aspect ratio.
-
-    Args:
-        keypoints (Tensor): Keypoints to be rescaled.
-        scale_factor (float | tuple[float, float]): Scale factor to be applied to keypoints with (height, width)
-            or single float value.
-
-    Returns:
-        (Tensor): The rescaled keypoints.
-    """
-    if isinstance(scale_factor, float):
-        w_scale = h_scale = scale_factor
-    else:
-        h_scale, w_scale = scale_factor
-
-    keypoints[:, 0] *= w_scale
-    keypoints[:, 1] *= h_scale
-    return keypoints
-
-
-def translate_bboxes(boxes: Tensor, distances: Sequence[float]) -> Tensor:
-    """Translate boxes in-place.
-
-    Args:
-        boxes (Tensor): Bounding boxes to be translated.
-        distances (Sequence[float]): Translate distances. The first
-            is horizontal distance and the second is vertical distance.
-
-    Returns:
-        (Tensor): Translated bounding boxes.
-    """
-    assert len(distances) == 2  # noqa: S101
-    return boxes + boxes.new_tensor(distances).repeat(2)
-
-
-def translate_masks(
-    masks: np.ndarray,
-    out_shape: tuple[int, int],
-    offset: int | float,
-    direction: str = "horizontal",
-    border_value: int | tuple[int] = 0,
-    interpolation: str = "bilinear",
-) -> np.ndarray:
-    """Translate the masks.
-
-    Args:
-        masks (np.ndarray): Masks to be translated.
-        out_shape (tuple[int]): Shape for output mask, format (h, w).
-        offset (int | float): The offset for translate.
-        direction (str): The translate direction, either "horizontal" or "vertical".
-        border_value (int | tuple[int]): Border value. Default 0 for masks.
-        interpolation (str): Interpolation method, accepted values are
-            'nearest', 'bilinear', 'bicubic', 'area', 'lanczos'. Defaults to
-            'bilinear'.
-
-    Returns:
-        (np.ndarray): Translated BitmapMasks.
-    """
-    dtype = masks.dtype
-    if masks.shape[-2:] != out_shape:
-        empty_masks = np.zeros((masks.shape[0], *out_shape), dtype=dtype)
-        min_h = min(out_shape[0], masks.shape[1])
-        min_w = min(out_shape[1], masks.shape[2])
-        empty_masks[:, :min_h, :min_w] = masks[:, :min_h, :min_w]
-        masks = empty_masks
-
-    # from https://github.com/open-mmlab/mmcv/blob/v2.1.0/mmcv/image/geometric.py#L740-L788
-    height, width = masks.shape[1:]
-    if masks.ndim == 2:
-        channels = 1
-    elif masks.ndim == 3:
-        channels = masks.shape[0]
-
-    if isinstance(border_value, int):
-        border_value = tuple([border_value] * channels)  # type: ignore[assignment]
-    elif isinstance(border_value, tuple):
-        assert len(border_value) == channels, (  # noqa: S101
-            "Expected the num of elements in tuple equals the channels"
-            f"of input image. Found {len(border_value)} vs {channels}"
-        )
-    else:
-        msg = f"Invalid type {type(border_value)} for `border_value`."
-        raise ValueError(msg)  # noqa: TRY004
-
-    translate_matrix = _get_translate_matrix(offset, direction)
-    translated_masks = cv2.warpAffine(
-        masks.transpose((1, 2, 0)),
-        translate_matrix,
-        (width, height),
-        # Note case when the number elements in `border_value`
-        # greater than 3 (e.g. translating masks whose channels
-        # large than 3) will raise TypeError in `cv2.warpAffine`.
-        # Here simply slice the first 3 values in `border_value`.
-        borderValue=border_value[:3],  # type: ignore[index]
-        flags=CV2_INTERP_CODES[interpolation],
-    )
-
-    if translated_masks.ndim == 2:
-        translated_masks = translated_masks[:, :, None]
-    return translated_masks.transpose((2, 0, 1)).astype(dtype)
-
-
-def _get_translate_matrix(offset: int | float, direction: str = "horizontal") -> np.ndarray:
-    """Generate the translate matrix.
-
-    Args:
-        offset (int | float): The offset used for translate.
-        direction (str): The translate direction, either
-            "horizontal" or "vertical".
-
-    Returns:
-        ndarray: The translate matrix with dtype float32.
-    """
-    if direction == "horizontal":
-        translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]])
-    elif direction == "vertical":
-        translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]])
-    return translate_matrix
-
-
-def clip_bboxes(boxes: Tensor, img_shape: tuple[int, int]) -> Tensor:
-    """Clip boxes according to the image shape in-place.
-
-    Args:
-        img_shape (tuple[int, int]): A tuple of image height and width.
-
-    Returns:
-        (Tensor): Clipped boxes.
-    """
-    h, w = img_shape
-    boxes[..., 0::2] = boxes[..., 0::2].clamp(0, w)
-    boxes[..., 1::2] = boxes[..., 1::2].clamp(0, h)
-    return boxes
-
-
-def is_inside_bboxes(
-    boxes: Tensor,
-    img_shape: tuple[int, int],
-    all_inside: bool = False,
-    allowed_border: int = 0,
-) -> BoolTensor:
-    """Find boxes inside the image.
-
-    Args:
-        boxes (Tensor): Bounding boxes to be checked.
-        img_shape (tuple[int, int]): A tuple of image height and width.
-        all_inside (bool): Whether the boxes are all inside the image or
-            part inside the image. Defaults to False.
-        allowed_border (int): Boxes that extend beyond the image shape
-            boundary by more than ``allowed_border`` are considered
-            "outside" Defaults to 0.
-
-    Returns:
-        (BoolTensor): A BoolTensor indicating whether the box is inside
-            the image. Assuming the original boxes have shape (m, n, 4),
-            the output has shape (m, n).
-    """
-    img_h, img_w = img_shape
-    if all_inside:
-        return (
-            (boxes[:, 0] >= -allowed_border)
-            & (boxes[:, 1] >= -allowed_border)
-            & (boxes[:, 2] < img_w + allowed_border)
-            & (boxes[:, 3] < img_h + allowed_border)
-        )
-    return (
-        (boxes[..., 0] < img_w + allowed_border)
-        & (boxes[..., 1] < img_h + allowed_border)
-        & (boxes[..., 2] > -allowed_border)
-        & (boxes[..., 3] > -allowed_border)
-    )
-
-
-def flip_bboxes(boxes: Tensor, img_shape: tuple[int, int], direction: str = "horizontal") -> Tensor:
-    """Flip boxes horizontally or vertically in-place.
-
-    Args:
-        boxes (Tensor): Bounding boxes to be flipped.
-        img_shape (Tuple[int, int]): A tuple of image height and width.
-        direction (str): Flip direction, options are "horizontal",
-            "vertical" and "diagonal". Defaults to "horizontal"
-
-    Returns:
-        (Tensor): Flipped bounding boxes.
-    """
-    assert direction in ["horizontal", "vertical", "diagonal"]  # noqa: S101
-    flipped = boxes.clone()
-    if direction == "horizontal":
-        flipped[..., 0] = img_shape[1] - boxes[..., 2]
-        flipped[..., 2] = img_shape[1] - boxes[..., 0]
-    elif direction == "vertical":
-        flipped[..., 1] = img_shape[0] - boxes[..., 3]
-        flipped[..., 3] = img_shape[0] - boxes[..., 1]
-    else:
-        flipped[..., 0] = img_shape[1] - boxes[..., 2]
-        flipped[..., 1] = img_shape[0] - boxes[..., 3]
-        flipped[..., 2] = img_shape[1] - boxes[..., 0]
-        flipped[..., 3] = img_shape[0] - boxes[..., 1]
-    return flipped
-
-
-def overlap_bboxes(
-    bboxes1: Tensor,
-    bboxes2: Tensor,
-    mode: str = "iou",
-    is_aligned: bool = False,
-    eps: float = 1e-6,
-) -> Tensor:
-    """Calculate overlap between two set of bboxes.
-
-    FP16 Contributed by https://github.com/open-mmlab/mmdetection/pull/4889
-    Note:
-        Assume bboxes1 is M x 4, bboxes2 is N x 4, when mode is 'iou',
-        there are some new generated variable when calculating IOU
-        using overlap_bboxes function:
-
-        1) is_aligned is False
-            area1: M x 1
-            area2: N x 1
-            lt: M x N x 2
-            rb: M x N x 2
-            wh: M x N x 2
-            overlap: M x N x 1
-            union: M x N x 1
-            ious: M x N x 1
-
-            Total memory:
-                S = (9 x N x M + N + M) * 4 Byte,
-
-            When using FP16, we can reduce:
-                R = (9 x N x M + N + M) * 4 / 2 Byte
-                R large than (N + M) * 4 * 2 is always true when N and M >= 1.
-                Obviously, N + M <= N * M < 3 * N * M, when N >=2 and M >=2,
-                           N + 1 < 3 * N, when N or M is 1.
-
-            Given M = 40 (ground truth), N = 400000 (three anchor boxes
-            in per grid, FPN, R-CNNs),
-                R = 275 MB (one times)
-
-            A special case (dense detection), M = 512 (ground truth),
-                R = 3516 MB = 3.43 GB
-
-            When the batch size is B, reduce:
-                B x R
-
-            Therefore, CUDA memory runs out frequently.
-
-            Experiments on GeForce RTX 2080Ti (11019 MiB):
-
-            |   dtype   |   M   |   N   |   Use    |   Real   |   Ideal   |
-            |:----:|:----:|:----:|:----:|:----:|:----:|
-            |   FP32   |   512 | 400000 | 8020 MiB |   --   |   --   |
-            |   FP16   |   512 | 400000 |   4504 MiB | 3516 MiB | 3516 MiB |
-            |   FP32   |   40 | 400000 |   1540 MiB |   --   |   --   |
-            |   FP16   |   40 | 400000 |   1264 MiB |   276MiB   | 275 MiB |
-
-        2) is_aligned is True
-            area1: N x 1
-            area2: N x 1
-            lt: N x 2
-            rb: N x 2
-            wh: N x 2
-            overlap: N x 1
-            union: N x 1
-            ious: N x 1
-
-            Total memory:
-                S = 11 x N * 4 Byte
-
-            When using FP16, we can reduce:
-                R = 11 x N * 4 / 2 Byte
-
-        So do the 'giou' (large than 'iou').
-
-        Time-wise, FP16 is generally faster than FP32.
-
-        When gpu_assign_thr is not -1, it takes more time on cpu
-        but not reduce memory.
-        There, we can reduce half the memory and keep the speed.
-
-    If ``is_aligned`` is ``False``, then calculate the overlaps between each
-    bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
-    pair of bboxes1 and bboxes2.
-
-    Args:
-        bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
-        bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
-            B indicates the batch dim, in shape (B1, B2, ..., Bn).
-            If ``is_aligned`` is ``True``, then m and n must be equal.
-        mode (str): "iou" (intersection over union), "iof" (intersection over
-            foreground) or "giou" (generalized intersection over union).
-            Default "iou".
-        is_aligned (bool, optional): If True, then m and n must be equal.
-            Default False.
-        eps (float, optional): A value added to the denominator for numerical
-            stability. Default 1e-6.
-
-    Returns:
-        Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)
-
-    Example:
-        >>> bboxes1 = torch.FloatTensor([
-        >>>     [0, 0, 10, 10],
-        >>>     [10, 10, 20, 20],
-        >>>     [32, 32, 38, 42],
-        >>> ])
-        >>> bboxes2 = torch.FloatTensor([
-        >>>     [0, 0, 10, 20],
-        >>>     [0, 10, 10, 19],
-        >>>     [10, 10, 20, 20],
-        >>> ])
-        >>> overlaps = overlap_bboxes(bboxes1, bboxes2)
-        >>> assert overlaps.shape == (3, 3)
-        >>> overlaps = overlap_bboxes(bboxes1, bboxes2, is_aligned=True)
-        >>> assert overlaps.shape == (3, )
-
-    Example:
-        >>> empty = torch.empty(0, 4)
-        >>> nonempty = torch.FloatTensor([[0, 0, 10, 9]])
-        >>> assert tuple(overlap_bboxes(empty, nonempty).shape) == (0, 1)
-        >>> assert tuple(overlap_bboxes(nonempty, empty).shape) == (1, 0)
-        >>> assert tuple(overlap_bboxes(empty, empty).shape) == (0, 0)
-    """
-    assert mode in ["iou", "iof", "giou"], f"Unsupported mode {mode}"  # noqa: S101
-    # Either the boxes are empty or the length of boxes' last dimension is 4
-    assert bboxes1.size(-1) == 4 or bboxes1.size(0) == 0  # noqa: S101
-    assert bboxes2.size(-1) == 4 or bboxes2.size(0) == 0  # noqa: S101
-
-    # Batch dim must be the same
-    # Batch dim: (B1, B2, ... Bn)
-    assert bboxes1.shape[:-2] == bboxes2.shape[:-2]  # noqa: S101
-    batch_shape = bboxes1.shape[:-2]
-
-    rows = bboxes1.size(-2)
-    cols = bboxes2.size(-2)
-    if is_aligned:
-        assert rows == cols  # noqa: S101
-
-    if rows * cols == 0:
-        if is_aligned:
-            return bboxes1.new((*batch_shape, rows))
-        return bboxes1.new((*batch_shape, rows, cols))
-
-    area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
-    area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
-
-    if is_aligned:
-        lt = torch.max(bboxes1[..., :2], bboxes2[..., :2])  # [B, rows, 2]
-        rb = torch.min(bboxes1[..., 2:], bboxes2[..., 2:])  # [B, rows, 2]
-
-        wh = fp16_clamp(rb - lt, min=0)
-        overlap = wh[..., 0] * wh[..., 1]
-
-        union = area1 + area2 - overlap if mode in ["iou", "giou"] else area1
-        if mode == "giou":
-            enclosed_lt = torch.min(bboxes1[..., :2], bboxes2[..., :2])
-            enclosed_rb = torch.max(bboxes1[..., 2:], bboxes2[..., 2:])
-    else:
-        lt = torch.max(bboxes1[..., :, None, :2], bboxes2[..., None, :, :2])  # [B, rows, cols, 2]
-        rb = torch.min(bboxes1[..., :, None, 2:], bboxes2[..., None, :, 2:])  # [B, rows, cols, 2]
-
-        wh = fp16_clamp(rb - lt, min=0)
-        overlap = wh[..., 0] * wh[..., 1]
-
-        union = area1[..., None] + area2[..., None, :] - overlap if mode in ["iou", "giou"] else area1[..., None]
-        if mode == "giou":
-            enclosed_lt = torch.min(bboxes1[..., :, None, :2], bboxes2[..., None, :, :2])
-            enclosed_rb = torch.max(bboxes1[..., :, None, 2:], bboxes2[..., None, :, 2:])
-
-    eps = union.new_tensor([eps])
-    union = torch.max(union, eps)
-    ious = overlap / union
-    if mode in ["iou", "iof"]:
-        return ious
-    # calculate gious
-    enclose_wh = fp16_clamp(enclosed_rb - enclosed_lt, min=0)
-    enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
-    enclose_area = torch.max(enclose_area, eps)
-    return ious - (enclose_area - union) / enclose_area
-
-
-def centers_bboxes(boxes: Tensor) -> Tensor:
-    """Return a tensor representing the centers of boxes."""
-    return (boxes[..., :2] + boxes[..., 2:]) / 2
-
-
-def fp16_clamp(x: Tensor, min: float | None = None, max: float | None = None) -> Tensor:  # noqa: A002
-    """Clamp fp16 tensor."""
-    if not x.is_cuda and x.dtype == torch.float16:
-        # clamp for cpu float16, tensor fp16 has no clamp implementation
-        return x.float().clamp(min, max).half()
-
-    return x.clamp(min, max)
-
-
-def scale_size(
-    size: tuple[int, int],
-    scale: float | int | tuple[float, float] | tuple[int, int],
-) -> tuple[int, int]:
-    """Rescale a size by a ratio.
-
-    Args:
-        size (tuple[int]): (height, width).
-        scale (float | int | tuple(float) | tuple(int)): Scaling factor with (height, width).
-
-    Returns:
-        tuple[int]: scaled size with (height, width).
-    """
-    if isinstance(scale, (float, int)):
-        scale = (scale, scale)
-    h, w = size
-    return int(h * float(scale[0]) + 0.5), int(w * float(scale[1]) + 0.5)
-
-
-def rescale_size(
-    old_size: tuple,
-    scale: float | int | tuple[float, float] | tuple[int, int],
-    return_scale: bool = False,
-) -> tuple[int, int] | tuple[tuple[int, int], float | int]:
-    """Calculate the new size to be rescaled to.
-
-    Args:
-        old_size (tuple[int]): The old size (height, width) of image.
-        scale (float | int | tuple[float] | tuple[int]): The scaling factor or maximum size.
-            If it is a float number, an integer, or a tuple of 2 float numbers,
-            then the image will be rescaled by this factor, else if it is a tuple of 2 integers,
-            then the image will be rescaled as large as possible within the scale.
-        return_scale (bool): Whether to return the scaling factor besides the rescaled image size.
-
-    Returns:
-        tuple[int]: The new rescaled image size with (height, width).
-            If return_scale is True, scale_factor obtained again will be returned as well.
-    """
-    h, w = old_size
-    msg = ""
-    if isinstance(scale, (float, int)):
-        if scale <= 0:
-            msg = f"Invalid scale {scale}, must be positive."
-            raise ValueError(msg)
-        scale_factor = scale
-    elif isinstance(scale, tuple):
-        if isinstance(scale[0], int):
-            max_long_edge = max(scale)
-            max_short_edge = min(scale)
-            scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w))
-        elif isinstance(scale[0], float):
-            scale_factor = scale  # type: ignore[assignment]
-        else:
-            msg = f"Scale must be a number or tuple of int/float, but got tuple of {type(scale[0])}"
-    else:
-        msg = f"Scale must be a number or tuple of int/float, but got {type(scale)}"
-
-    if msg:
-        raise TypeError(msg)
-
-    new_size = scale_size((h, w), scale_factor)
-
-    if return_scale:
-        return new_size, scale_factor
-    return new_size
-
-
-def flip_image(img: np.ndarray | list[np.ndarray], direction: str = "horizontal") -> np.ndarray | list[np.ndarray]:
-    """Flip an image horizontally or vertically.
-
-    Args:
-        img (ndarray): Image to be flipped.
-        direction (str): The flip direction, either "horizontal" or
-            "vertical" or "diagonal".
-
-    Returns:
-        ndarray: The flipped image.
-    """
-    if direction not in ["horizontal", "vertical", "diagonal"]:
-        msg = f"direction (={direction}) should be in one of ('horizontal', 'vertical', 'diagonal')."
-        raise ValueError(msg)
-
-    if isinstance(img, list):
-        return [flip_image(im, direction) for im in img]
-
-    if direction == "horizontal":
-        return np.flip(img, axis=1)
-    elif direction == "vertical":  # noqa: RET505
-        return np.flip(img, axis=0)
-    else:
-        return np.flip(img, axis=(0, 1))
-
-
-def flip_masks(masks: np.ndarray, direction: str = "horizontal") -> np.ndarray:
-    """Flip masks alone the given direction."""
-    assert direction in ("horizontal", "vertical", "diagonal")  # noqa: S101
-
-    return np.stack([flip_image(mask, direction=direction) for mask in masks])
-
-
-def project_bboxes(boxes: Tensor, homography_matrix: Tensor | np.ndarray) -> Tensor:
-    """Geometric transformat boxes in-place.
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/structures/bbox/horizontal_boxes.py#L184-L202
-
-    Args:
-        homography_matrix (Tensor or np.ndarray]):
-            Shape (3, 3) for geometric transformation.
-
-    Returns:
-        (Tensor | np.ndarray): Projected bounding boxes.
-    """
-    if isinstance(homography_matrix, np.ndarray):
-        homography_matrix = boxes.new_tensor(homography_matrix)
-    corners = hbox2corner(boxes)
-    corners = torch.cat([corners, corners.new_ones(*corners.shape[:-1], 1)], dim=-1)
-    corners_t = torch.transpose(corners, -1, -2)
-    corners_t = torch.matmul(homography_matrix, corners_t)
-    corners = torch.transpose(corners_t, -1, -2)
-    # Convert to homogeneous coordinates by normalization
-    corners = corners[..., :2] / corners[..., 2:3]
-    return corner2hbox(corners)
-
-
-def hbox2corner(boxes: Tensor) -> Tensor:
-    """Convert box coordinates from (x1, y1, x2, y2) to corners ((x1, y1), (x2, y1), (x1, y2), (x2, y2)).
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/structures/bbox/horizontal_boxes.py#L204-L217
-
-    Args:
-        boxes (Tensor): Horizontal box tensor with shape of (..., 4).
-
-    Returns:
-        Tensor: Corner tensor with shape of (..., 4, 2).
-    """
-    x1, y1, x2, y2 = torch.split(boxes, 1, dim=-1)
-    corners = torch.cat([x1, y1, x2, y1, x1, y2, x2, y2], dim=-1)
-    return corners.reshape(*corners.shape[:-1], 4, 2)
-
-
-def corner2hbox(corners: Tensor) -> Tensor:
-    """Convert box coordinates from corners ((x1, y1), (x2, y1), (x1, y2), (x2, y2)) to (x1, y1, x2, y2).
-
-    Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/structures/bbox/horizontal_boxes.py#L219-L234
-
-    Args:
-        corners (Tensor): Corner tensor with shape of (..., 4, 2).
-
-    Returns:
-        Tensor: Horizontal box tensor with shape of (..., 4).
-    """
-    if corners.numel() == 0:
-        return corners.new_zeros((0, 4))
-    min_xy = corners.min(dim=-2)[0]
-    max_xy = corners.max(dim=-2)[0]
-    return torch.cat([min_xy, max_xy], dim=-1)
-
-
-def crop_masks(masks: np.ndarray, bbox: np.ndarray) -> np.ndarray:
-    """Crop each mask by the given bbox."""
-    assert isinstance(bbox, np.ndarray)  # noqa: S101
-    assert bbox.ndim == 1  # noqa: S101
-
-    height, width = masks.shape[1:]
-
-    # clip the boundary
-    bbox = bbox.copy()
-    bbox[0::2] = np.clip(bbox[0::2], 0, width)
-    bbox[1::2] = np.clip(bbox[1::2], 0, height)
-    x1, y1, x2, y2 = bbox
-    w = np.maximum(x2 - x1, 1)
-    h = np.maximum(y2 - y1, 1)
-
-    return masks[:, y1 : y1 + h, x1 : x1 + w]
-
-
-def get_bboxes_from_masks(masks: Tensor) -> np.ndarray:
-    """Create boxes from masks."""
-    num_masks = len(masks)
-    bboxes = np.zeros((num_masks, 4), dtype=np.float32)
-
-    x_any = masks.any(axis=1)
-    y_any = masks.any(axis=2)
-    for idx in range(num_masks):
-        x = np.where(x_any[idx, :])[0]
-        y = np.where(y_any[idx, :])[0]
-        if len(x) > 0 and len(y) > 0:
-            # use +1 for x_max and y_max so that the right and bottom
-            # boundary of instance masks are fully included by the box
-            bboxes[idx, :] = np.array([x[0], y[0], x[-1] + 1, y[-1] + 1], dtype=np.float32)
-    return bboxes
diff --git a/library/src/otx/data/utils/utils.py b/library/src/otx/data/utils/utils.py
index 3fcb9b38ca4..d9980d7ca3c 100644
--- a/library/src/otx/data/utils/utils.py
+++ b/library/src/otx/data/utils/utils.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Utility functions for the data module."""
@@ -41,7 +41,9 @@ def instantiate_sampler(sampler_config: SamplerConfig, dataset: Dataset, **kwarg
     init_signature = list(inspect.signature(sampler_class.__init__).parameters.keys())
     if "batch_size" not in init_signature:
         kwargs.pop("batch_size", None)
-    sampler_kwargs = {**sampler_config.init_args, **kwargs}
+    # Handle None init_args
+    init_args = sampler_config.init_args or {}
+    sampler_kwargs = {**init_args, **kwargs}
     return sampler_class(dataset, **sampler_kwargs)
 
 
diff --git a/library/src/otx/recipe/_base_/data/classification.yaml b/library/src/otx/recipe/_base_/data/classification.yaml
index 1f3fb87eee9..a497ef9de35 100644
--- a/library/src/otx/recipe/_base_/data/classification.yaml
+++ b/library/src/otx/recipe/_base_/data/classification.yaml
@@ -5,119 +5,85 @@ input_size:
 unannotated_items_ratio: 0.0
 train_subset:
   subset_name: train
-  transform_lib_type: TORCHVISION
   batch_size: 64
   num_workers: 2
-  to_tv_image: false
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.RandomResizedCrop
+  augmentations_cpu:
+    - class_path: torchvision.transforms.v2.RandomResizedCrop
       init_args:
-        scale: $(input_size)
-        crop_ratio_range:
+        size: $(input_size)
+        scale:
           - 0.08
           - 1.0
-        aspect_ratio_range:
+        ratio:
           - 0.75
           - 1.34
-    - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-      enable: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.RandomHorizontalFlip
       init_args:
-        brightness:
-          - 0.875
-          - 1.125
-        contrast:
-          - 0.5
-          - 1.5
-        saturation:
-          - 0.5
-          - 1.5
-        hue:
-          - -0.05
-          - 0.05
         p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomAffine
-      enable: false
-      init_args:
-        max_rotate_degree: 10.0
-        max_translate_ratio: 0.1
-        scaling_ratio_range:
-          - 0.5
-          - 1.5
-        max_shear_degree: 2.0
-    - class_path: otx.data.transform_libs.torchvision.RandomFlip
-      enable: true
-      init_args:
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.RandomVerticalFlip
-      enable: false
+    - class_path: kornia.augmentation.ColorJiggle
       init_args:
+        brightness: 0.2
+        contrast: 0.2
+        saturation: 0.2
+        hue: 0.05
         p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-      enable: false
-      init_args:
-        kernel_size: 5
-        sigma:
-          - 0.1
-          - 2.0
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-        scale: false
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-      enable: false
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        mean: 0.0
-        sigma: 0.1
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: otx.data.samplers.balanced_sampler.BalancedSampler
     init_args: null
 
 val_subset:
   subset_name: val
-  transform_lib_type: TORCHVISION
   batch_size: 64
   num_workers: 2
-  to_tv_image: false
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-    - class_path: torchvision.transforms.v2.ToDtype
+  augmentations_cpu:
+    - class_path: torchvision.transforms.v2.Resize
       init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-        scale: false
-    - class_path: torchvision.transforms.v2.Normalize
+        size: $(input_size)
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
 
 test_subset:
   subset_name: test
-  transform_lib_type: TORCHVISION
   batch_size: 64
   num_workers: 2
-  to_tv_image: false
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-    - class_path: torchvision.transforms.v2.ToDtype
+  augmentations_cpu:
+    - class_path: torchvision.transforms.v2.Resize
       init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-        scale: false
-    - class_path: torchvision.transforms.v2.Normalize
+        size: $(input_size)
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
diff --git a/library/src/otx/recipe/_base_/data/detection.yaml b/library/src/otx/recipe/_base_/data/detection.yaml
index 184567221ed..81ee6cef326 100644
--- a/library/src/otx/recipe/_base_/data/detection.yaml
+++ b/library/src/otx/recipe/_base_/data/detection.yaml
@@ -3,115 +3,84 @@ input_size:
   - 800
   - 992
 unannotated_items_ratio: 0.0
+
 train_subset:
   subset_name: train
-  transform_lib_type: TORCHVISION
-  batch_size: 1
-  num_workers: 2
-  to_tv_image: false
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
-      enable: true
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-        transform_bbox: true
-    - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-      enable: false
+  batch_size: 8
+  num_workers: 4
+  augmentations_cpu:
+    - class_path: torchvision.transforms.v2.RandomIoUCrop
+    - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
       init_args:
-        brightness:
-          - 0.875
-          - 1.125
-        contrast:
-          - 0.5
-          - 1.5
-        saturation:
-          - 0.5
-          - 1.5
-        hue:
-          - -0.05
-          - 0.05
-        p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomAffine
-      enable: false
+        min_size: 1
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        max_rotate_degree: 10.0
-        max_translate_ratio: 0.1
-        scaling_ratio_range:
-          - 0.5
-          - 1.5
-        max_shear_degree: 2.0
-    - class_path: otx.data.transform_libs.torchvision.RandomFlip
-      enable: true
-      init_args:
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.RandomVerticalFlip
-      enable: false
+        size: $(input_size)
+        keep_aspect_ratio: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.RandomHorizontalFlip
       init_args:
         p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-      enable: false
-      init_args:
-        kernel_size: 5
-        sigma:
-          - 0.1
-          - 2.0
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.ToDtype
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-      enable: false
-      init_args:
-        mean: 0.0
-        sigma: 0.1
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [0.0, 0.0, 0.0]
-        std: [255.0, 255.0, 255.0]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
 
 val_subset:
   subset_name: val
-  transform_lib_type: TORCHVISION
-  batch_size: 1
-  num_workers: 2
-  to_tv_image: false
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
+  batch_size: 8
+  num_workers: 4
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        scale: $(input_size)
-    - class_path: torchvision.transforms.v2.ToDtype
+        size: $(input_size)
+        keep_aspect_ratio: false
+        resize_targets: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [0.0, 0.0, 0.0]
-        std: [255.0, 255.0, 255.0]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
 
 test_subset:
   subset_name: test
-  transform_lib_type: TORCHVISION
-  batch_size: 1
-  num_workers: 2
-  to_tv_image: false
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-    - class_path: torchvision.transforms.v2.ToDtype
+  batch_size: 8
+  num_workers: 4
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
+        size: $(input_size)
+        keep_aspect_ratio: false
+        resize_targets: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        mean: [0.0, 0.0, 0.0]
-        std: [255.0, 255.0, 255.0]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
diff --git a/library/src/otx/recipe/_base_/data/detection_tile.yaml b/library/src/otx/recipe/_base_/data/detection_tile.yaml
index 5d161f19e95..c3c76a2d0c4 100644
--- a/library/src/otx/recipe/_base_/data/detection_tile.yaml
+++ b/library/src/otx/recipe/_base_/data/detection_tile.yaml
@@ -6,117 +6,82 @@ unannotated_items_ratio: 0.0
 tile_config:
   enable_tiler: true
   enable_adaptive_tiling: true
+
 train_subset:
   subset_name: train
-  transform_lib_type: TORCHVISION
   batch_size: 1
   num_workers: 2
-  to_tv_image: false
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
+  augmentations_cpu:
+    - class_path: torchvision.transforms.v2.RandomIoUCrop
       enable: true
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-        keep_ratio: false
-        transform_bbox: true
-    - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-      enable: false
-      init_args:
-        brightness:
-          - 0.875
-          - 1.125
-        contrast:
-          - 0.5
-          - 1.5
-        saturation:
-          - 0.5
-          - 1.5
-        hue:
-          - -0.05
-          - 0.05
-        p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomAffine
-      enable: false
-      init_args:
-        max_rotate_degree: 10.0
-        max_translate_ratio: 0.1
-        scaling_ratio_range:
-          - 0.5
-          - 1.5
-        max_shear_degree: 2.0
-    - class_path: otx.data.transform_libs.torchvision.RandomFlip
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.RandomVerticalFlip
-      enable: false
+        size: $(input_size)
+        keep_aspect_ratio: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.RandomHorizontalFlip
       init_args:
         p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-      enable: false
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        kernel_size: 5
-        sigma:
-          - 0.1
-          - 2.0
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-      enable: false
-      init_args:
-        mean: 0.0
-        sigma: 0.1
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [0.0, 0.0, 0.0]
-        std: [255.0, 255.0, 255.0]
+        mean:
+          - 0.0
+          - 0.0
+          - 0.0
+        std:
+          - 1.0
+          - 1.0
+          - 1.0
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
 
 val_subset:
   subset_name: val
-  transform_lib_type: TORCHVISION
   batch_size: 1
   num_workers: 2
-  to_tv_image: false
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        scale: $(input_size)
-        keep_ratio: false
-    - class_path: torchvision.transforms.v2.ToDtype
+        size: $(input_size)
+        keep_aspect_ratio: false
+        resize_targets: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [0.0, 0.0, 0.0]
-        std: [255.0, 255.0, 255.0]
+        mean:
+          - 0.0
+          - 0.0
+          - 0.0
+        std:
+          - 1.0
+          - 1.0
+          - 1.0
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
 
 test_subset:
   subset_name: test
-  transform_lib_type: TORCHVISION
   batch_size: 1
   num_workers: 2
-  to_tv_image: false
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-        keep_ratio: false
-    - class_path: torchvision.transforms.v2.ToDtype
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
+        size: $(input_size)
+        keep_aspect_ratio: false
+        resize_targets: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        mean: [0.0, 0.0, 0.0]
-        std: [255.0, 255.0, 255.0]
+        mean:
+          - 0.0
+          - 0.0
+          - 0.0
+        std:
+          - 1.0
+          - 1.0
+          - 1.0
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
diff --git a/library/src/otx/recipe/_base_/data/instance_segmentation.yaml b/library/src/otx/recipe/_base_/data/instance_segmentation.yaml
index 86158503ccd..734ac26c091 100644
--- a/library/src/otx/recipe/_base_/data/instance_segmentation.yaml
+++ b/library/src/otx/recipe/_base_/data/instance_segmentation.yaml
@@ -3,131 +3,80 @@ input_size:
   - 1024
   - 1024
 unannotated_items_ratio: 0.0
+
 train_subset:
   subset_name: train
-  transform_lib_type: TORCHVISION
-  batch_size: 1
-  num_workers: 2
-  to_tv_image: true
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
+  batch_size: 8
+  num_workers: 4
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        keep_ratio: true
-        transform_bbox: true
-        transform_mask: true
-        scale: $(input_size)
-    - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-      enable: false
+        size: $(input_size)
+        keep_aspect_ratio: true
+  augmentations_gpu:
+    - class_path: kornia.augmentation.RandomHorizontalFlip
       init_args:
-        brightness:
-          - 0.875
-          - 1.125
-        contrast:
-          - 0.5
-          - 1.5
-        saturation:
-          - 0.5
-          - 1.5
-        hue:
-          - -0.05
-          - 0.05
         p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomAffine
-      enable: false
-      init_args:
-        max_rotate_degree: 10.0
-        max_translate_ratio: 0.1
-        scaling_ratio_range:
-          - 0.5
-          - 1.5
-        max_shear_degree: 2.0
-        is_numpy_to_tvtensor: false
-    - class_path: otx.data.transform_libs.torchvision.Pad
-      enable: true
-      init_args:
-        pad_to_square: true
-        transform_mask: true
-    - class_path: otx.data.transform_libs.torchvision.RandomFlip
-      enable: true
-      init_args:
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.RandomVerticalFlip
-      enable: false
-      init_args:
-        p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-      enable: false
-      init_args:
-        kernel_size: 5
-        sigma:
-          - 0.1
-          - 2.0
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-      enable: false
-      init_args:
-        mean: 0.0
-        sigma: 0.1
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+    - class_path: kornia.augmentation.Normalize
+      init_args:
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
 
 val_subset:
   subset_name: val
-  transform_lib_type: TORCHVISION
-  batch_size: 1
-  num_workers: 2
-  to_tv_image: true
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        keep_ratio: true
-        scale: $(input_size)
-        is_numpy_to_tvtensor: false
-    - class_path: otx.data.transform_libs.torchvision.Pad
-      init_args:
-        pad_to_square: true
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+  batch_size: 8
+  num_workers: 4
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
+      init_args:
+        size: $(input_size)
+        keep_aspect_ratio: true
+        resize_targets: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
+      init_args:
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
 
 test_subset:
   subset_name: test
-  transform_lib_type: TORCHVISION
-  batch_size: 1
-  num_workers: 2
-  to_tv_image: true
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        keep_ratio: true
-        scale: $(input_size)
-        is_numpy_to_tvtensor: false
-    - class_path: otx.data.transform_libs.torchvision.Pad
-      init_args:
-        pad_to_square: true
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+  batch_size: 8
+  num_workers: 4
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
+      init_args:
+        size: $(input_size)
+        keep_aspect_ratio: true
+        resize_targets: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
+      init_args:
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
diff --git a/library/src/otx/recipe/_base_/data/keypoint_detection.yaml b/library/src/otx/recipe/_base_/data/keypoint_detection.yaml
index a6faa905c95..d7a0a9062e1 100644
--- a/library/src/otx/recipe/_base_/data/keypoint_detection.yaml
+++ b/library/src/otx/recipe/_base_/data/keypoint_detection.yaml
@@ -3,103 +3,81 @@ unannotated_items_ratio: 0.0
 input_size:
   - 512
   - 512
+
 train_subset:
   subset_name: train
   batch_size: 32
   num_workers: 2
-  to_tv_image: true
-  transforms:
-    - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-      enable: false
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        brightness:
-          - 0.875
-          - 1.125
-        contrast:
-          - 0.5
-          - 1.5
-        saturation:
-          - 0.5
-          - 1.5
-        hue:
-          - -0.05
-          - 0.05
-        p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.TopdownAffine
-      init_args:
-        input_size: $(input_size)
-        probability: 1.0
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-      enable: false
-      init_args:
-        kernel_size: 5
-        sigma:
-          - 0.1
-          - 2.0
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-      enable: false
+        size: $(input_size)
+        keep_aspect_ratio: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.RandomAffine
       init_args:
-        mean: 0.0
-        sigma: 0.1
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.Normalize
+        degrees: 80.0
+        translate: [0.16, 0.16]
+        scale: [0.5, 1.5]
+        p: 1.0
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
+
 val_subset:
   subset_name: val
   batch_size: 32
   num_workers: 2
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-        keep_ratio: true
-        transform_keypoints: false
-        is_numpy_to_tvtensor: false
-    - class_path: otx.data.transform_libs.torchvision.Pad
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
         size: $(input_size)
-        pad_val: 0
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
+        keep_aspect_ratio: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
+
 test_subset:
   subset_name: test
   batch_size: 32
   num_workers: 2
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-        keep_ratio: true
-        transform_keypoints: false
-        is_numpy_to_tvtensor: false
-    - class_path: otx.data.transform_libs.torchvision.Pad
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
         size: $(input_size)
-        pad_val: 0
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
+        keep_aspect_ratio: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
diff --git a/library/src/otx/recipe/_base_/data/semantic_segmentation.yaml b/library/src/otx/recipe/_base_/data/semantic_segmentation.yaml
index de2f20f6e74..198c1eb9cf6 100644
--- a/library/src/otx/recipe/_base_/data/semantic_segmentation.yaml
+++ b/library/src/otx/recipe/_base_/data/semantic_segmentation.yaml
@@ -4,74 +4,35 @@ input_size:
   - 512
 unannotated_items_ratio: 0.0
 ignore_index: 255
+
 train_subset:
   subset_name: train
   batch_size: 8
   num_workers: 4
-  transform_lib_type: TORCHVISION
-  to_tv_image: true
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.RandomResizedCrop
+  augmentations_cpu:
+    - class_path: torchvision.transforms.v2.RandomResizedCrop
       init_args:
-        scale: $(input_size)
-        crop_ratio_range:
+        size: $(input_size)
+        scale:
           - 0.2
           - 1.0
-        aspect_ratio_range:
+        ratio:
           - 0.5
           - 2.0
-        transform_mask: true
-    - class_path: otx.data.transform_libs.torchvision.PhotoMetricDistortion
-      enable: true
-      init_args:
-        brightness_delta: 32
-        contrast:
-          - 0.5
-          - 1.5
-        saturation:
-          - 0.5
-          - 1.5
-        hue_delta: 18
-        probability: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomAffine
-      enable: false
-      init_args:
-        max_rotate_degree: 10.0
-        max_translate_ratio: 0.1
-        scaling_ratio_range:
-          - 0.5
-          - 1.5
-        max_shear_degree: 2.0
-    - class_path: otx.data.transform_libs.torchvision.RandomFlip
-      enable: true
-      init_args:
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.RandomVerticalFlip
-      enable: false
+  augmentations_gpu:
+    - class_path: kornia.augmentation.RandomHorizontalFlip
       init_args:
         p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-      enable: false
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        kernel_size: 5
-        sigma:
-          - 0.1
-          - 2.0
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-        scale: false
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-      enable: false
-      init_args:
-        mean: 0.0
-        sigma: 0.1
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
@@ -80,20 +41,23 @@ val_subset:
   subset_name: val
   batch_size: 8
   num_workers: 4
-  transform_lib_type: TORCHVISION
-  to_tv_image: true
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-        transform_mask: true
-    - class_path: torchvision.transforms.v2.ToDtype
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
+        size: $(input_size)
+        keep_aspect_ratio: false
+        resize_targets: true
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
@@ -102,20 +66,23 @@ test_subset:
   subset_name: test
   num_workers: 4
   batch_size: 8
-  transform_lib_type: TORCHVISION
-  to_tv_image: true
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-        transform_mask: true
-    - class_path: torchvision.transforms.v2.ToDtype
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
+        size: $(input_size)
+        keep_aspect_ratio: false
+        resize_targets: true
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
       init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
diff --git a/library/src/otx/recipe/_base_/data/semantic_segmentation_tile.yaml b/library/src/otx/recipe/_base_/data/semantic_segmentation_tile.yaml
index 3596e73898d..f24ffcafdc4 100644
--- a/library/src/otx/recipe/_base_/data/semantic_segmentation_tile.yaml
+++ b/library/src/otx/recipe/_base_/data/semantic_segmentation_tile.yaml
@@ -7,69 +7,39 @@ ignore_index: 255
 tile_config:
   enable_tiler: true
   enable_adaptive_tiling: true
+
 train_subset:
   subset_name: train
   batch_size: 8
   num_workers: 4
-  transform_lib_type: TORCHVISION
-  to_tv_image: true
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        keep_ratio: true
-        transform_mask: true
-        scale: $(input_size)
-    - class_path: otx.data.transform_libs.torchvision.PhotoMetricDistortion
-      enable: true
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
-        brightness_delta: 32
-        contrast:
-          - 0.5
-          - 1.5
-        saturation:
-          - 0.5
-          - 1.5
-        hue_delta: 18
-        probability: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomAffine
-      enable: false
+        size: $(input_size)
+        keep_aspect_ratio: false
+        resize_targets: true
+  augmentations_gpu:
+    - class_path: kornia.augmentation.RandomHorizontalFlip
       init_args:
-        max_rotate_degree: 10.0
-        max_translate_ratio: 0.1
-        scaling_ratio_range:
-          - 0.5
-          - 1.5
-        max_shear_degree: 2.0
-    - class_path: otx.data.transform_libs.torchvision.RandomFlip
+        p: 0.5
+    - class_path: kornia.augmentation.ColorJiggle
       enable: true
       init_args:
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.RandomVerticalFlip
-      enable: false
-      init_args:
+        brightness: 0.125
+        contrast: 0.5
+        saturation: 0.5
+        hue: 0.05
         p: 0.5
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-      enable: false
-      init_args:
-        kernel_size: 5
-        sigma:
-          - 0.1
-          - 2.0
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-        scale: false
-    - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-      enable: false
-      init_args:
-        mean: 0.0
-        sigma: 0.1
-        probability: 0.5
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+    - class_path: kornia.augmentation.Normalize
+      init_args:
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
@@ -78,25 +48,22 @@ val_subset:
   subset_name: val
   batch_size: 8
   num_workers: 4
-  transform_lib_type: TORCHVISION
-  to_tv_image: true
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-        keep_ratio: true
-        is_numpy_to_tvtensor: false
-    - class_path: otx.data.transform_libs.torchvision.Pad
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
         size: $(input_size)
-        pad_val: 255
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        resize_targets: true
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
+      init_args:
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
@@ -105,25 +72,22 @@ test_subset:
   subset_name: test
   num_workers: 4
   batch_size: 8
-  transform_lib_type: TORCHVISION
-  to_tv_image: true
-  transforms:
-    - class_path: otx.data.transform_libs.torchvision.Resize
-      init_args:
-        scale: $(input_size)
-        keep_ratio: true
-        is_numpy_to_tvtensor: false
-    - class_path: otx.data.transform_libs.torchvision.Pad
+  augmentations_cpu:
+    - class_path: otx.data.augmentation.transforms.Resize
       init_args:
         size: $(input_size)
-        pad_val: 255
-    - class_path: torchvision.transforms.v2.ToDtype
-      init_args:
-        dtype: ${as_torch_dtype:torch.float32}
-    - class_path: torchvision.transforms.v2.Normalize
-      init_args:
-        mean: [123.675, 116.28, 103.53]
-        std: [58.395, 57.12, 57.375]
+        resize_targets: true
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
+      init_args:
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
diff --git a/library/src/otx/recipe/_base_/data/torchvision_base.yaml b/library/src/otx/recipe/_base_/data/torchvision_base.yaml
index 5dacda2bc68..0215626195e 100644
--- a/library/src/otx/recipe/_base_/data/torchvision_base.yaml
+++ b/library/src/otx/recipe/_base_/data/torchvision_base.yaml
@@ -1,37 +1,62 @@
 task: MULTI_CLASS_CLS
 unannotated_items_ratio: 0.0
+
 train_subset:
   subset_name: train
-  transform_lib_type: TORCHVISION
-  to_tv_image: true
-  transforms:
-    - class_path: torchvision.transforms.v2.ToImage
   batch_size: 1
   num_workers: 2
+  augmentations_cpu: []
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
+      init_args:
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
 
 val_subset:
   subset_name: val
-  transform_lib_type: TORCHVISION
-  to_tv_image: true
-  transforms:
-    - class_path: torchvision.transforms.v2.ToImage
   batch_size: 1
   num_workers: 2
+  augmentations_cpu: []
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
+      init_args:
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
 
 test_subset:
   subset_name: test
-  transform_lib_type: TORCHVISION
-  to_tv_image: true
-  transforms:
-    - class_path: torchvision.transforms.v2.ToImage
   batch_size: 1
   num_workers: 2
+  augmentations_cpu: []
+  augmentations_gpu:
+    - class_path: kornia.augmentation.Normalize
+      init_args:
+        mean:
+          - 0.485
+          - 0.456
+          - 0.406
+        std:
+          - 0.229
+          - 0.224
+          - 0.225
   sampler:
     class_path: torch.utils.data.RandomSampler
     init_args: null
diff --git a/library/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml b/library/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml
index dbc33d85e4e..9a05e4d85c7 100644
--- a/library/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml
+++ b/library/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml
@@ -46,74 +46,36 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
   max_epochs: 90
 
   data:
     task: H_LABEL_CLS
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml b/library/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml
index 2594e72a053..2ad4f469235 100644
--- a/library/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml
+++ b/library/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml
@@ -49,75 +49,37 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
 
   max_epochs: 90
 
   data:
     task: H_LABEL_CLS
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/classification/h_label_cls/efficientnet_v2_kl.yaml b/library/src/otx/recipe/classification/h_label_cls/efficientnet_v2_kl.yaml
index 56b292be2ea..7c027e4ad0e 100644
--- a/library/src/otx/recipe/classification/h_label_cls/efficientnet_v2_kl.yaml
+++ b/library/src/otx/recipe/classification/h_label_cls/efficientnet_v2_kl.yaml
@@ -50,75 +50,37 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
 
   max_epochs: 90
 
   data:
     task: H_LABEL_CLS
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/classification/h_label_cls/openvino_model.yaml b/library/src/otx/recipe/classification/h_label_cls/openvino_model.yaml
index 8d7fe78e4a4..fbc5e77ac4d 100644
--- a/library/src/otx/recipe/classification/h_label_cls/openvino_model.yaml
+++ b/library/src/otx/recipe/classification/h_label_cls/openvino_model.yaml
@@ -5,5 +5,6 @@ data: ../../_base_/data/torchvision_base.yaml
 overrides:
   data:
     task: H_LABEL_CLS
+    stack_images: false
     test_subset:
       batch_size: 128
diff --git a/library/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml b/library/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml
index 306a217e038..0b608c899e8 100644
--- a/library/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml
+++ b/library/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml
@@ -45,74 +45,36 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
 
   max_epochs: 90
   data:
     task: H_LABEL_CLS
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml b/library/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml
index d9a2138027d..81514908b0b 100644
--- a/library/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml
+++ b/library/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml
@@ -45,74 +45,36 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
 
   max_epochs: 90
   data:
     task: H_LABEL_CLS
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml b/library/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml
index 893ee13fd82..4e1706c2b23 100644
--- a/library/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml
+++ b/library/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml
@@ -46,73 +46,4 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
   max_epochs: 90
-
-  data:
-    train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
-          init_args:
-            scale: $(input_size)
-            crop_ratio_range:
-              - 0.08
-              - 1.0
-            aspect_ratio_range:
-              - 0.75
-              - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
-          init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
diff --git a/library/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml b/library/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml
index 35bb8fdc15b..dc0c8359e7e 100644
--- a/library/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml
+++ b/library/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml
@@ -47,74 +47,36 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
 
   max_epochs: 90
 
   data:
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/classification/multi_class_cls/openvino_model.yaml b/library/src/otx/recipe/classification/multi_class_cls/openvino_model.yaml
index c30b349385e..6f2fa877b72 100644
--- a/library/src/otx/recipe/classification/multi_class_cls/openvino_model.yaml
+++ b/library/src/otx/recipe/classification/multi_class_cls/openvino_model.yaml
@@ -5,5 +5,6 @@ data: ../../_base_/data/torchvision_base.yaml
 overrides:
   data:
     task: MULTI_CLASS_CLS
+    stack_images: false
     test_subset:
       batch_size: 128
diff --git a/library/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml b/library/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml
index 605282fe9c5..9ac170d7247 100644
--- a/library/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml
+++ b/library/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml
@@ -46,72 +46,34 @@ callbacks:
 overrides:
   max_epochs: 90
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
 
   data:
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml b/library/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml
index d91240183da..38dcb9325be 100644
--- a/library/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml
+++ b/library/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml
@@ -47,74 +47,36 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
 
   max_epochs: 200
   data:
     task: MULTI_LABEL_CLS
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml b/library/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml
index f47518b7d84..1beafcfdd36 100644
--- a/library/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml
+++ b/library/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml
@@ -47,74 +47,36 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
 
   max_epochs: 200
   data:
     task: MULTI_LABEL_CLS
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/classification/multi_label_cls/openvino_model.yaml b/library/src/otx/recipe/classification/multi_label_cls/openvino_model.yaml
index ec04a6a48fd..0628d6b8165 100644
--- a/library/src/otx/recipe/classification/multi_label_cls/openvino_model.yaml
+++ b/library/src/otx/recipe/classification/multi_label_cls/openvino_model.yaml
@@ -5,5 +5,6 @@ data: ../../_base_/data/torchvision_base.yaml
 overrides:
   data:
     task: MULTI_LABEL_CLS
+    stack_images: false
     test_subset:
       batch_size: 128
diff --git a/library/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml b/library/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml
index dcd6fa8434b..11021fc8f14 100644
--- a/library/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml
+++ b/library/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml
@@ -45,73 +45,35 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
 
   data:
     task: MULTI_LABEL_CLS
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml b/library/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml
index 7c0068f9907..1a723e6bfdf 100644
--- a/library/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml
+++ b/library/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml
@@ -44,73 +44,35 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
 
   data:
     task: MULTI_LABEL_CLS
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.EfficientNetRandomCrop
+      # CPU-stage: Size-dependent transforms only
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      # GPU-stage: Pixel-level augmentations (Kornia)
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/detection/atss_mobilenetv2.yaml b/library/src/otx/recipe/detection/atss_mobilenetv2.yaml
index b24d4acb32e..1ebc4c87d32 100644
--- a/library/src/otx/recipe/detection/atss_mobilenetv2.yaml
+++ b/library/src/otx/recipe/detection/atss_mobilenetv2.yaml
@@ -4,7 +4,6 @@ model:
   init_args:
     model_name: atss_mobilenetv2
     label_info: 80
-
     optimizer:
       class_path: torch.optim.SGD
       init_args:
diff --git a/library/src/otx/recipe/detection/deim_dfine_l.yaml b/library/src/otx/recipe/detection/deim_dfine_l.yaml
index 05121a652d6..f8f42ee7cbd 100644
--- a/library/src/otx/recipe/detection/deim_dfine_l.yaml
+++ b/library/src/otx/recipe/detection/deim_dfine_l.yaml
@@ -56,144 +56,128 @@ callbacks:
       data_aug_switch:
         class_path: otx.backend.native.callbacks.aug_scheduler.DataAugSwitch
         init_args:
-          policy_epochs: [4, 23, 40]
+          policy_epochs: [4, 23]
+          input_size: [640, 640]
           policies:
             no_aug:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.Resize
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640]
-                    keep_ratio: false
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
-                  init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
             light_aug:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
+              augmentations_cpu:
                 - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-                  init_args:
-                    probability: 0.8
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
             strong_aug_1:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.CachedMosaic
                   init_args:
                     random_pop: false
                     max_cached_images: 20
                     img_scale: [640, 640]
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: false
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640] # (H, W)
-                    keep_ratio: false
-                    transform_bbox: true
-                - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
             strong_aug_2:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
-                - class_path: torchvision.transforms.v2.RandomZoomOut # Can't be used when using CachedMosaic
+              augmentations_cpu:
+                - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop # Can't be used when using CachedMosaic
-                  init_args:
-                    probability: 0.8
+                    side_range: [1.0, 2.0]
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: false
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
+                  init_args:
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
 data: ../_base_/data/torchvision_base.yaml
 overrides:
   callbacks:
@@ -214,52 +198,45 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            p: 0.5
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/deim_dfine_m.yaml b/library/src/otx/recipe/detection/deim_dfine_m.yaml
index f0d4e0ce0ff..51237626e63 100644
--- a/library/src/otx/recipe/detection/deim_dfine_m.yaml
+++ b/library/src/otx/recipe/detection/deim_dfine_m.yaml
@@ -56,144 +56,128 @@ callbacks:
       data_aug_switch:
         class_path: otx.backend.native.callbacks.aug_scheduler.DataAugSwitch
         init_args:
-          policy_epochs: [4, 40, 72]
+          policy_epochs: [4, 40]
+          input_size: [640, 640]
           policies:
             no_aug:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.Resize
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640]
-                    keep_ratio: false
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
-                  init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
             light_aug:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
+              augmentations_cpu:
                 - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-                  init_args:
-                    probability: 0.8
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
             strong_aug_1:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.CachedMosaic
                   init_args:
                     random_pop: false
                     max_cached_images: 20
                     img_scale: [640, 640]
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: false
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640] # (H, W)
-                    keep_ratio: false
-                    transform_bbox: true
-                - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
             strong_aug_2:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
-                - class_path: torchvision.transforms.v2.RandomZoomOut # Can't be used when using CachedMosaic
+              augmentations_cpu:
+                - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop # Can't be used when using CachedMosaic
-                  init_args:
-                    probability: 0.8
+                    side_range: [1.0, 2.0]
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: false
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
+                  init_args:
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
 data: ../_base_/data/torchvision_base.yaml
 overrides:
   callbacks:
@@ -214,52 +198,46 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            p: 0.5
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/deim_dfine_x.yaml b/library/src/otx/recipe/detection/deim_dfine_x.yaml
index c41d55a24ea..de31415d0cd 100644
--- a/library/src/otx/recipe/detection/deim_dfine_x.yaml
+++ b/library/src/otx/recipe/detection/deim_dfine_x.yaml
@@ -56,144 +56,128 @@ callbacks:
       data_aug_switch:
         class_path: otx.backend.native.callbacks.aug_scheduler.DataAugSwitch
         init_args:
-          policy_epochs: [4, 23, 40]
+          policy_epochs: [4, 23]
+          input_size: [640, 640]
           policies:
             no_aug:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.Resize
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640]
-                    keep_ratio: false
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
-                  init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
             light_aug:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
+              augmentations_cpu:
                 - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-                  init_args:
-                    probability: 0.8
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
             strong_aug_1:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.CachedMosaic
                   init_args:
                     random_pop: false
                     max_cached_images: 20
                     img_scale: [640, 640]
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: false
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640] # (H, W)
-                    keep_ratio: false
-                    transform_bbox: true
-                - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
             strong_aug_2:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
-                - class_path: torchvision.transforms.v2.RandomZoomOut # Can't be used when using CachedMosaic
+              augmentations_cpu:
+                - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop # Can't be used when using CachedMosaic
-                  init_args:
-                    probability: 0.8
+                    side_range: [1.0, 2.0]
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: false
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
+                  init_args:
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
                     mean: [0.0, 0.0, 0.0]
-                    std: [255.0, 255.0, 255.0]
+                    std: [1.0, 1.0, 1.0]
 data: ../_base_/data/torchvision_base.yaml
 overrides:
   callbacks:
@@ -214,52 +198,45 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            p: 0.5
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/deimv2_l.yaml b/library/src/otx/recipe/detection/deimv2_l.yaml
index 866da921f09..5883fadaaad 100644
--- a/library/src/otx/recipe/detection/deimv2_l.yaml
+++ b/library/src/otx/recipe/detection/deimv2_l.yaml
@@ -56,146 +56,128 @@ callbacks:
       data_aug_switch:
         class_path: otx.backend.native.callbacks.aug_scheduler.DataAugSwitch
         init_args:
-          policy_epochs: [4, 23, 40]
+          policy_epochs: [4, 23]
+          input_size: [640, 640]
           policies:
             no_aug:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.Resize
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
-                  init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             light_aug:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
+              augmentations_cpu:
                 - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-                  init_args:
-                    probability: 0.8
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             strong_aug_1:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.CachedMosaic
                   init_args:
                     random_pop: true
                     max_cached_images: 20
                     img_scale: [640, 640]
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: true
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640] # (H, W)
-                    keep_ratio: false
-                    transform_bbox: true
-                - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             strong_aug_2:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
-                - class_path: torchvision.transforms.v2.RandomZoomOut # Can't be used when using CachedMosaic
+              augmentations_cpu:
+                - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
                     side_range: [1.0, 2.0]
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop # Can't be used when using CachedMosaic
-                  init_args:
-                    probability: 0.8
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: true
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
+                  init_args:
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
 data: ../_base_/data/torchvision_base.yaml
 overrides:
   callbacks:
@@ -217,52 +199,45 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
+            p: 0.5
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
diff --git a/library/src/otx/recipe/detection/deimv2_m.yaml b/library/src/otx/recipe/detection/deimv2_m.yaml
index 1a4ea6cdd70..bb7764fac71 100644
--- a/library/src/otx/recipe/detection/deimv2_m.yaml
+++ b/library/src/otx/recipe/detection/deimv2_m.yaml
@@ -56,145 +56,128 @@ callbacks:
       data_aug_switch:
         class_path: otx.backend.native.callbacks.aug_scheduler.DataAugSwitch
         init_args:
-          policy_epochs: [4, 40, 72]
+          policy_epochs: [4, 40]
+          input_size: [640, 640]
           policies:
             no_aug:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.Resize
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
-                  init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             light_aug:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
+              augmentations_cpu:
                 - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-                  init_args:
-                    probability: 0.8
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             strong_aug_1:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.CachedMosaic
                   init_args:
                     random_pop: true
                     max_cached_images: 20
                     img_scale: [640, 640]
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: true
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640] # (H, W)
-                    keep_ratio: false
-                    transform_bbox: true
-                - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             strong_aug_2:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
-                - class_path: torchvision.transforms.v2.RandomZoomOut # Can't be used when using CachedMosaic
+              augmentations_cpu:
+                - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop # Can't be used when using CachedMosaic
-                  init_args:
-                    probability: 0.8
+                    side_range: [1.0, 2.0]
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: true
                     max_cached_images: 10
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
+                  init_args:
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
 data: ../_base_/data/torchvision_base.yaml
 overrides:
   callbacks:
@@ -215,52 +198,45 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
+            p: 0.5
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
diff --git a/library/src/otx/recipe/detection/deimv2_s.yaml b/library/src/otx/recipe/detection/deimv2_s.yaml
index 3a0fb4252b8..f191e3b28ad 100644
--- a/library/src/otx/recipe/detection/deimv2_s.yaml
+++ b/library/src/otx/recipe/detection/deimv2_s.yaml
@@ -56,145 +56,128 @@ callbacks:
       data_aug_switch:
         class_path: otx.backend.native.callbacks.aug_scheduler.DataAugSwitch
         init_args:
-          policy_epochs: [4, 40, 70]
+          policy_epochs: [4, 40]
+          input_size: [640, 640]
           policies:
             no_aug:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.Resize
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
-                  init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             light_aug:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
+              augmentations_cpu:
                 - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-                  init_args:
-                    probability: 0.8
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             strong_aug_1:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.CachedMosaic
                   init_args:
                     random_pop: true
                     max_cached_images: 20
                     img_scale: [640, 640]
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: true
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640] # (H, W)
-                    keep_ratio: false
-                    transform_bbox: true
-                - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             strong_aug_2:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
-                - class_path: torchvision.transforms.v2.RandomZoomOut # Can't be used when using CachedMosaic
+              augmentations_cpu:
+                - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop # Can't be used when using CachedMosaic
-                  init_args:
-                    probability: 0.8
+                    side_range: [1.0, 2.0]
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: true
                     max_cached_images: 10
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
+                  init_args:
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
 data: ../_base_/data/torchvision_base.yaml
 overrides:
   callbacks:
@@ -215,52 +198,45 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
+            p: 0.5
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
diff --git a/library/src/otx/recipe/detection/deimv2_x.yaml b/library/src/otx/recipe/detection/deimv2_x.yaml
index fe13de08ea6..1861e25d33f 100644
--- a/library/src/otx/recipe/detection/deimv2_x.yaml
+++ b/library/src/otx/recipe/detection/deimv2_x.yaml
@@ -56,145 +56,128 @@ callbacks:
       data_aug_switch:
         class_path: otx.backend.native.callbacks.aug_scheduler.DataAugSwitch
         init_args:
-          policy_epochs: [0, 23, 40]
+          policy_epochs: [0, 23]
+          input_size: [640, 640]
           policies:
             no_aug:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.Resize
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
-                  init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             light_aug:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
+              augmentations_cpu:
                 - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-                  init_args:
-                    probability: 0.8
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             strong_aug_1:
-              to_tv_image: false
-              transforms:
-                - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+              augmentations_cpu:
+                - class_path: otx.data.augmentation.transforms.CachedMosaic
                   init_args:
                     random_pop: true
                     max_cached_images: 20
                     img_scale: [640, 640]
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: true
                     max_cached_images: 10
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
                   init_args:
-                    scale: [640, 640] # (H, W)
-                    keep_ratio: false
-                    transform_bbox: true
-                - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    probability: 0.5
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
             strong_aug_2:
-              to_tv_image: true
-              transforms:
-                - class_path: torchvision.transforms.v2.ToImage
-                - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-                  init_args:
-                    p: 0.5
-                - class_path: torchvision.transforms.v2.RandomZoomOut # Can't be used when using CachedMosaic
+              augmentations_cpu:
+                - class_path: torchvision.transforms.v2.RandomZoomOut
                   init_args:
                     fill: 0
                     p: 0.5
-                - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop # Can't be used when using CachedMosaic
-                  init_args:
-                    probability: 0.8
+                    side_range: [1.0, 2.0]
+                - class_path: torchvision.transforms.v2.RandomIoUCrop
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.CachedMixUp
                   init_args:
-                    img_scale: [640, 640] # (H, W)
-                    ratio_range:
-                      - 1.0
-                      - 1.0
+                    img_scale: [640, 640]
+                    ratio_range: [1.0, 1.0]
                     probability: 0.5
                     random_pop: true
                     max_cached_images: 10
-                - class_path: otx.data.transform_libs.torchvision.RandomFlip
-                  init_args:
-                    probability: 0.5
                 - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
                   init_args:
-                    min_area: 1
-                - class_path: otx.data.transform_libs.torchvision.Resize
+                    min_size: 1
+                - class_path: otx.data.augmentation.transforms.Resize
+                  init_args:
+                    size: [640, 640]
+                    keep_aspect_ratio: false
+              augmentations_gpu:
+                - class_path: kornia.augmentation.RandomHorizontalFlip
                   init_args:
-                    scale: [640, 640]
-                    transform_bbox: true
-                    keep_ratio: false
-                - class_path: torchvision.transforms.v2.ToDtype
+                    p: 0.5
+                - class_path: kornia.augmentation.ColorJiggle
                   init_args:
-                    dtype: ${as_torch_dtype:torch.float32}
-                    scale: false
-                - class_path: torchvision.transforms.v2.Normalize
+                    brightness: 0.125
+                    contrast: 0.5
+                    saturation: 0.5
+                    hue: 0.05
+                    p: 0.5
+                - class_path: kornia.augmentation.Normalize
                   init_args:
-                    mean: [123.675, 116.280, 103.530]
-                    std: [58.395, 57.120, 57.375]
+                    mean: [0.485, 0.456, 0.406]
+                    std: [0.229, 0.224, 0.225]
 data: ../_base_/data/torchvision_base.yaml
 overrides:
   callbacks:
@@ -216,52 +199,45 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
+            p: 0.5
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            mean: [123.675, 116.280, 103.530]
-            std: [58.395, 57.120, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
diff --git a/library/src/otx/recipe/detection/dfine_x.yaml b/library/src/otx/recipe/detection/dfine_x.yaml
index d62cb58ffc8..dbcbc2ea8d9 100644
--- a/library/src/otx/recipe/detection/dfine_x.yaml
+++ b/library/src/otx/recipe/detection/dfine_x.yaml
@@ -64,110 +64,54 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
+        - class_path: otx.data.augmentation.transforms.RandomIoUCrop
           init_args:
-            probability: 0.8
+            trials: 50
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: otx.data.transform_libs.torchvision.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            transform_bbox: true
-            keep_ratio: false
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
-          init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
             p: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
-          init_args:
-            min_size: 1
+            std: [1.0, 1.0, 1.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/dfine_x_tile.yaml b/library/src/otx/recipe/detection/dfine_x_tile.yaml
index 0e4d6ea7867..62f0385679c 100644
--- a/library/src/otx/recipe/detection/dfine_x_tile.yaml
+++ b/library/src/otx/recipe/detection/dfine_x_tile.yaml
@@ -53,10 +53,6 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   data:
     input_size:
       - 640
@@ -65,113 +61,52 @@ overrides:
     train_subset:
       batch_size: 8
       num_workers: 4
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 0.8
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            transform_bbox: true
-            keep_ratio: false
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
-          init_args:
-            min_size: 1
+            std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
     test_subset:
       batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/detection/openvino_model.yaml b/library/src/otx/recipe/detection/openvino_model.yaml
index 75fa8facecb..2d11a199314 100644
--- a/library/src/otx/recipe/detection/openvino_model.yaml
+++ b/library/src/otx/recipe/detection/openvino_model.yaml
@@ -4,21 +4,19 @@ data: ../_base_/data/detection.yaml
 
 overrides:
   reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.val_subset.augmentations_cpu
+    - data.test_subset.augmentations_cpu
 
   data:
+    stack_images: false
     train_subset:
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
     val_subset:
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
     test_subset:
-      to_tv_image: true
       batch_size: 64
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
diff --git a/library/src/otx/recipe/detection/rfdetr_base.yaml b/library/src/otx/recipe/detection/rfdetr_base.yaml
index bd841895903..65d62d391ba 100644
--- a/library/src/otx/recipe/detection/rfdetr_base.yaml
+++ b/library/src/otx/recipe/detection/rfdetr_base.yaml
@@ -30,7 +30,7 @@ engine:
 
 callback_monitor: val/f1-score
 
-data: ../_base_/data/torchvision_base.yaml
+data: ../_base_/data/detection.yaml
 
 callbacks:
   - class_path: otx.backend.native.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
@@ -68,63 +68,19 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 8
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: otx.data.augmentation.transforms.RandomIoUCrop
+        - class_path: torchvision.transforms.v2.ClampBoundingBoxes
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
-
-    val_subset:
-      batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
-          init_args:
-            size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-
-    test_subset:
-      batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
-          init_args:
-            size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
diff --git a/library/src/otx/recipe/detection/rfdetr_large.yaml b/library/src/otx/recipe/detection/rfdetr_large.yaml
index 2596a18457d..78928904337 100644
--- a/library/src/otx/recipe/detection/rfdetr_large.yaml
+++ b/library/src/otx/recipe/detection/rfdetr_large.yaml
@@ -30,7 +30,7 @@ engine:
 
 callback_monitor: val/f1-score
 
-data: ../_base_/data/torchvision_base.yaml
+data: ../_base_/data/detection.yaml
 
 callbacks:
   - class_path: otx.backend.native.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
@@ -68,63 +68,24 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 4
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
-          init_args:
-            size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
 
     test_subset:
       batch_size: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
-          init_args:
-            size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
diff --git a/library/src/otx/recipe/detection/rfdetr_medium.yaml b/library/src/otx/recipe/detection/rfdetr_medium.yaml
index 37c2fb91e00..222c7a8fe34 100644
--- a/library/src/otx/recipe/detection/rfdetr_medium.yaml
+++ b/library/src/otx/recipe/detection/rfdetr_medium.yaml
@@ -30,7 +30,7 @@ engine:
 
 callback_monitor: val/f1-score
 
-data: ../_base_/data/torchvision_base.yaml
+data: ../_base_/data/detection.yaml
 
 callbacks:
   - class_path: otx.backend.native.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
@@ -68,63 +68,18 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 8
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
-
-    val_subset:
-      batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
-          init_args:
-            size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-
-    test_subset:
-      batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
-          init_args:
-            size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
diff --git a/library/src/otx/recipe/detection/rfdetr_nano.yaml b/library/src/otx/recipe/detection/rfdetr_nano.yaml
index 17112bcd684..ccfd0221609 100644
--- a/library/src/otx/recipe/detection/rfdetr_nano.yaml
+++ b/library/src/otx/recipe/detection/rfdetr_nano.yaml
@@ -30,7 +30,7 @@ engine:
 
 callback_monitor: val/f1-score
 
-data: ../_base_/data/torchvision_base.yaml
+data: ../_base_/data/detection.yaml
 
 callbacks:
   - class_path: otx.backend.native.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
@@ -68,63 +68,18 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 8
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
-
-    val_subset:
-      batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
-          init_args:
-            size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-
-    test_subset:
-      batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
-          init_args:
-            size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
diff --git a/library/src/otx/recipe/detection/rfdetr_small.yaml b/library/src/otx/recipe/detection/rfdetr_small.yaml
index 631d7f46316..c6cee5f97ec 100644
--- a/library/src/otx/recipe/detection/rfdetr_small.yaml
+++ b/library/src/otx/recipe/detection/rfdetr_small.yaml
@@ -30,7 +30,7 @@ engine:
 
 callback_monitor: val/f1-score
 
-data: ../_base_/data/torchvision_base.yaml
+data: ../_base_/data/detection.yaml
 
 callbacks:
   - class_path: otx.backend.native.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling
@@ -68,63 +68,18 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 8
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
-
-    val_subset:
-      batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
-          init_args:
-            size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-
-    test_subset:
-      batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
-          init_args:
-            size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
diff --git a/library/src/otx/recipe/detection/rtdetr_101.yaml b/library/src/otx/recipe/detection/rtdetr_101.yaml
index a408d8e6286..8c48c29045a 100644
--- a/library/src/otx/recipe/detection/rtdetr_101.yaml
+++ b/library/src/otx/recipe/detection/rtdetr_101.yaml
@@ -61,100 +61,46 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
-          enable: false
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
-          init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
+            min_size: 1
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            std: [1.0, 1.0, 1.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/rtdetr_101_tile.yaml b/library/src/otx/recipe/detection/rtdetr_101_tile.yaml
index d8310320a31..05424e4c0e1 100644
--- a/library/src/otx/recipe/detection/rtdetr_101_tile.yaml
+++ b/library/src/otx/recipe/detection/rtdetr_101_tile.yaml
@@ -61,29 +61,35 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
-        - class_path: torchvision.transforms.v2.ToDtype
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
+            p: 0.5
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu: []
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/detection/rtdetr_18.yaml b/library/src/otx/recipe/detection/rtdetr_18.yaml
index 4504787deb9..680d88f1e18 100644
--- a/library/src/otx/recipe/detection/rtdetr_18.yaml
+++ b/library/src/otx/recipe/detection/rtdetr_18.yaml
@@ -60,100 +60,46 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
-          enable: false
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
-          init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
+            min_size: 1
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            std: [1.0, 1.0, 1.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/rtdetr_18_tile.yaml b/library/src/otx/recipe/detection/rtdetr_18_tile.yaml
index 850786dd8df..b5ab32147ec 100644
--- a/library/src/otx/recipe/detection/rtdetr_18_tile.yaml
+++ b/library/src/otx/recipe/detection/rtdetr_18_tile.yaml
@@ -60,29 +60,35 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
-        - class_path: torchvision.transforms.v2.ToDtype
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
+            p: 0.5
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu: []
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/detection/rtdetr_50.yaml b/library/src/otx/recipe/detection/rtdetr_50.yaml
index 82668560e7e..0c70991fdad 100644
--- a/library/src/otx/recipe/detection/rtdetr_50.yaml
+++ b/library/src/otx/recipe/detection/rtdetr_50.yaml
@@ -61,100 +61,48 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
-          enable: false
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
-          init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          enable: true
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
+            min_size: 1
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            std: [1.0, 1.0, 1.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: false
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/rtdetr_50_tile.yaml b/library/src/otx/recipe/detection/rtdetr_50_tile.yaml
index 0c8a7ec8e0b..ef09873499e 100644
--- a/library/src/otx/recipe/detection/rtdetr_50_tile.yaml
+++ b/library/src/otx/recipe/detection/rtdetr_50_tile.yaml
@@ -61,29 +61,35 @@ overrides:
     task: DETECTION
     train_subset:
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
-        - class_path: torchvision.transforms.v2.ToDtype
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
+            p: 0.5
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu: []
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/detection/rtmdet_tiny.yaml b/library/src/otx/recipe/detection/rtmdet_tiny.yaml
index 8e9b356b123..3e24da66f5e 100644
--- a/library/src/otx/recipe/detection/rtmdet_tiny.yaml
+++ b/library/src/otx/recipe/detection/rtmdet_tiny.yaml
@@ -52,80 +52,38 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
+    - data.val_subset.augmentations_cpu
+    - data.val_subset.augmentations_gpu
+    - data.test_subset.augmentations_cpu
+    - data.test_subset.augmentations_gpu
   gradient_clip_val: 35.0
 
   data:
     input_size:
       - 640
       - 640
+
     train_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.CachedMosaic
-          init_args:
-            img_scale: $(input_size)
-            max_cached_images: 20
-            random_pop: false
-        - class_path: otx.data.transform_libs.torchvision.RandomResize
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size) * 2
-            ratio_range:
+            size: $(input_size)
+            scale:
               - 0.5
               - 2.0
-            keep_ratio: true
-            transform_bbox: true
-        - class_path: otx.data.transform_libs.torchvision.RandomCrop
-          init_args:
-            crop_size: $(input_size)
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
-          init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
+            ratio:
               - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
               - 2.0
-            probability: 0.5
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            antialias: true
+        - class_path: otx.data.augmentation.transforms.CachedMosaic
           init_args:
-            size: $(input_size)
-            pad_val: 114
-        - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+            img_scale: $(input_size)
+            max_cached_images: 20
+            random_pop: false
+        - class_path: otx.data.augmentation.transforms.CachedMixUp
           init_args:
             img_scale: $(input_size)
             ratio_range:
@@ -134,52 +92,66 @@ overrides:
             max_cached_images: 10
             random_pop: false
             probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
+            p: 0.5
+        - class_path: kornia.augmentation.ColorJiggle
           init_args:
-            mean: [103.53, 116.28, 123.675]
-            std: [57.375, 57.12, 58.395]
+            brightness: 0.125
+            contrast: 0.5
+            saturation: 0.5
+            hue: 0.05
+            p: 0.5
+        - class_path: kornia.augmentation.Normalize
+          init_args:
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
+
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [103.53, 116.28, 123.675]
-            std: [57.375, 57.12, 58.395]
+            keep_aspect_ratio: true
+            pad_value: 114
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
+          init_args:
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
+
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [103.53, 116.28, 123.675]
-            std: [57.375, 57.12, 58.395]
+            keep_aspect_ratio: true
+            pad_value: 114
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
+          init_args:
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/detection/rtmdet_tiny_tile.yaml b/library/src/otx/recipe/detection/rtmdet_tiny_tile.yaml
index eff70f2a364..c74da65a06e 100644
--- a/library/src/otx/recipe/detection/rtmdet_tiny_tile.yaml
+++ b/library/src/otx/recipe/detection/rtmdet_tiny_tile.yaml
@@ -51,6 +51,10 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
+  reset:
+    - data.train_subset.augmentations_cpu
+    - data.val_subset.augmentations_cpu
+    - data.test_subset.augmentations_cpu
   gradient_clip_val: 35.0
 
   data:
@@ -59,31 +63,48 @@ overrides:
       - 640
     train_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      num_workers: 4
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: true
+            pad_value: 114
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
+          init_args:
+            p: 0.5
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            mean: [103.53, 116.28, 123.675]
-            std: [57.375, 57.12, 58.395]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      num_workers: 4
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: true
+            pad_value: 114
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            mean: [103.53, 116.28, 123.675]
-            std: [57.375, 57.12, 58.395]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      num_workers: 4
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: true
+            pad_value: 114
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            mean: [103.53, 116.28, 123.675]
-            std: [57.375, 57.12, 58.395]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/library/src/otx/recipe/detection/ssd_mobilenetv2.yaml
index 998ed61b286..e5d9671ed8c 100644
--- a/library/src/otx/recipe/detection/ssd_mobilenetv2.yaml
+++ b/library/src/otx/recipe/detection/ssd_mobilenetv2.yaml
@@ -52,8 +52,6 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
   gradient_clip_val: 35.0
 
   data:
@@ -62,78 +60,49 @@ overrides:
       - 864
     train_subset:
       batch_size: 8
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            transform_bbox: true
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
+            min_size: 1
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
-            std: [255.0, 255.0, 255.0]
+            std: [1.0, 1.0, 1.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            scale: $(input_size)
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
+          init_args:
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/library/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml
index fcb0b1ba86d..b160b08c942 100644
--- a/library/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml
+++ b/library/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml
@@ -61,22 +61,45 @@ overrides:
 
     train_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
+          init_args:
+            p: 0.5
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            scale: $(input_size)
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/detection/yolox_l.yaml b/library/src/otx/recipe/detection/yolox_l.yaml
index 291a2583169..b984f49f8d5 100644
--- a/library/src/otx/recipe/detection/yolox_l.yaml
+++ b/library/src/otx/recipe/detection/yolox_l.yaml
@@ -52,11 +52,14 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   gradient_clip_val: 35.0
+  reset:
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
+    - data.val_subset.augmentations_cpu
+    - data.val_subset.augmentations_gpu
+    - data.test_subset.augmentations_cpu
+    - data.test_subset.augmentations_gpu
 
   data:
     input_size:
@@ -64,81 +67,44 @@ overrides:
       - 640
     train_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
-          enable: false
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            transform_bbox: true
-        - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.CachedMosaic
           init_args:
             random_pop: false
             max_cached_images: 20
-            img_scale: $(input_size) # (H, W)
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
+            img_scale: $(input_size)
+        - class_path: otx.data.augmentation.transforms.CachedMixUp
           init_args:
-            scaling_ratio_range:
-              - 0.1
-              - 2.0
-            border: $(input_size) * -0.5
-        - class_path: otx.data.transform_libs.torchvision.CachedMixUp
-          init_args:
-            img_scale: $(input_size) # (H, W)
+            img_scale: $(input_size)
             ratio_range:
               - 1.0
               - 1.0
             probability: 0.5
             random_pop: false
             max_cached_images: 10
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
-          init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomAffine
+          init_args:
+            degrees: 10.0
+            translate: [0.1, 0.1]
+            scale: [0.1, 2.0]
+            shear: [-2.0, 2.0]
+            p: 1.0
+        - class_path: kornia.augmentation.ColorJiggle
+          init_args:
+            brightness: 0.125
+            contrast: 0.5
+            saturation: 0.5
+            hue: 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
@@ -147,39 +113,27 @@ overrides:
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: true
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: true
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/yolox_l_tile.yaml b/library/src/otx/recipe/detection/yolox_l_tile.yaml
index 3802d97e33a..bcc36834ca1 100644
--- a/library/src/otx/recipe/detection/yolox_l_tile.yaml
+++ b/library/src/otx/recipe/detection/yolox_l_tile.yaml
@@ -52,8 +52,6 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
   gradient_clip_val: 35.0
 
   data:
@@ -63,85 +61,50 @@ overrides:
     train_subset:
       num_workers: 4
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: torchvision.transforms.v2.ColorJitter
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
+            brightness: 0.125
+            contrast: 0.5
+            saturation: 0.5
+            hue: 0.05
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
     val_subset:
       num_workers: 4
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
     test_subset:
       num_workers: 4
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/detection/yolox_s.yaml b/library/src/otx/recipe/detection/yolox_s.yaml
index 253e2d8fbcd..c6485ddb638 100644
--- a/library/src/otx/recipe/detection/yolox_s.yaml
+++ b/library/src/otx/recipe/detection/yolox_s.yaml
@@ -51,10 +51,6 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   gradient_clip_val: 35.0
   data:
     input_size:
@@ -62,81 +58,44 @@ overrides:
       - 640
     train_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
-          enable: false
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            transform_bbox: true
-        - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.CachedMosaic
           init_args:
             random_pop: false
             max_cached_images: 20
-            img_scale: $(input_size) # (H, W)
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          init_args:
-            scaling_ratio_range:
-              - 0.1
-              - 2.0
-            border: $(input_size) * -0.5
-        - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+            img_scale: $(input_size)
+        - class_path: otx.data.augmentation.transforms.CachedMixUp
           init_args:
-            img_scale: $(input_size) # (H, W)
+            img_scale: $(input_size)
             ratio_range:
               - 1.0
               - 1.0
             probability: 0.5
             random_pop: false
             max_cached_images: 10
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
-          init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomAffine
+          init_args:
+            degrees: 10.0
+            translate: [0.1, 0.1]
+            scale: [0.1, 2.0]
+            shear: [-2.0, 2.0]
+            p: 1.0
+        - class_path: kornia.augmentation.ColorJiggle
+          init_args:
+            brightness: 0.125
+            contrast: 0.5
+            saturation: 0.5
+            hue: 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
@@ -145,39 +104,27 @@ overrides:
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: true
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: true
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/yolox_s_tile.yaml b/library/src/otx/recipe/detection/yolox_s_tile.yaml
index 9a13304dd30..c4e23cb26d5 100644
--- a/library/src/otx/recipe/detection/yolox_s_tile.yaml
+++ b/library/src/otx/recipe/detection/yolox_s_tile.yaml
@@ -52,8 +52,6 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
   gradient_clip_val: 35.0
   data:
     input_size:
@@ -62,85 +60,50 @@ overrides:
     train_subset:
       num_workers: 4
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: torchvision.transforms.v2.ColorJitter
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
+            brightness: 0.125
+            contrast: 0.5
+            saturation: 0.5
+            hue: 0.05
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
     val_subset:
       num_workers: 4
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
     test_subset:
       num_workers: 4
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/detection/yolox_tiny.yaml b/library/src/otx/recipe/detection/yolox_tiny.yaml
index bead96ae181..32c0ed548b5 100644
--- a/library/src/otx/recipe/detection/yolox_tiny.yaml
+++ b/library/src/otx/recipe/detection/yolox_tiny.yaml
@@ -51,10 +51,6 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   gradient_clip_val: 35.0
   data:
     input_size:
@@ -62,121 +58,56 @@ overrides:
       - 416
     train_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
-          enable: false
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size) * 1.538 # 640x640
-            keep_ratio: true
-            transform_bbox: true
-        - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.CachedMosaic
           init_args:
             random_pop: false
             max_cached_images: 20
-            img_scale: $(input_size) * 1.538 # 640x640
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
+            img_scale: $(input_size) * 1.538
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            border: $(input_size) * 1.538 * -0.5 # 640x640 * -0.5
-        - class_path: otx.data.transform_libs.torchvision.CachedMixUp
-          enable: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomAffine
           init_args:
-            img_scale: $(input_size) * 1.538 # 640x640
-            ratio_range:
-              - 1.0
-              - 1.0
-            probability: 0.5
-            random_pop: false
-            max_cached_images: 10
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+            degrees: 10.0
+            translate: [0.1, 0.1]
+            shear: [-2.0, 2.0]
+            p: 1.0
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-          enable: false
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.ToDtype
+            size: $(input_size)
+            keep_aspect_ratio: true
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
     test_subset:
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: true
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
diff --git a/library/src/otx/recipe/detection/yolox_tiny_tile.yaml b/library/src/otx/recipe/detection/yolox_tiny_tile.yaml
index 2ba87b56a9f..3441ea73065 100644
--- a/library/src/otx/recipe/detection/yolox_tiny_tile.yaml
+++ b/library/src/otx/recipe/detection/yolox_tiny_tile.yaml
@@ -52,8 +52,6 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
   gradient_clip_val: 35.0
   data:
     input_size:
@@ -63,85 +61,44 @@ overrides:
     train_subset:
       num_workers: 4
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
     val_subset:
       num_workers: 4
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
     test_subset:
       num_workers: 4
       batch_size: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/detection/yolox_x.yaml b/library/src/otx/recipe/detection/yolox_x.yaml
index b63647b45df..f0541b7449e 100644
--- a/library/src/otx/recipe/detection/yolox_x.yaml
+++ b/library/src/otx/recipe/detection/yolox_x.yaml
@@ -51,10 +51,6 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   gradient_clip_val: 35.0
   data:
     input_size:
@@ -62,78 +58,16 @@ overrides:
       - 640
     train_subset:
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.MinIoURandomCrop
-          enable: false
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            transform_bbox: true
-        - class_path: otx.data.transform_libs.torchvision.CachedMosaic
-          init_args:
-            random_pop: false
-            max_cached_images: 20
-            img_scale: $(input_size) # (H, W)
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          init_args:
-            border: $(input_size) * -0.5
-        - class_path: otx.data.transform_libs.torchvision.CachedMixUp
-          init_args:
-            img_scale: $(input_size) # (H, W)
-            ratio_range:
-              - 1.0
-              - 1.0
-            probability: 0.5
-            random_pop: false
-            max_cached_images: 10
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
-          init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
@@ -142,39 +76,27 @@ overrides:
 
     val_subset:
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.ToDtype
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
+            size: $(input_size)
+            keep_aspect_ratio: true
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
     test_subset:
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            keep_ratio: true
-            is_numpy_to_tvtensor: false
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: true
-            pad_val: 114
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: torchvision.transforms.v2.Normalize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: true
+            resize_targets: false
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/detection/yolox_x_tile.yaml b/library/src/otx/recipe/detection/yolox_x_tile.yaml
index d2a09cc0957..ddc5cd2c8d1 100644
--- a/library/src/otx/recipe/detection/yolox_x_tile.yaml
+++ b/library/src/otx/recipe/detection/yolox_x_tile.yaml
@@ -52,8 +52,6 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset:
-    - data.train_subset.transforms
   gradient_clip_val: 35.0
   data:
     input_size:
@@ -62,85 +60,50 @@ overrides:
     train_subset:
       num_workers: 4
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-            keep_ratio: false
-            transform_bbox: true
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
+        - class_path: torchvision.transforms.v2.ColorJitter
           init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
-              - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
-          init_args:
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
-          init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
+            brightness: 0.125
+            contrast: 0.5
+            saturation: 0.5
+            hue: 0.05
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
     val_subset:
       num_workers: 4
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
     test_subset:
       num_workers: 4
       batch_size: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
+            size: $(input_size)
+            keep_aspect_ratio: false
+            resize_targets: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
             mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml b/library/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml
index 4915ae6ec96..a24a021cd95 100644
--- a/library/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml
+++ b/library/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml
@@ -57,31 +57,28 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_gpu:
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
-            size_divisor: 32
-        - class_path: torchvision.transforms.v2.Normalize
+            p: 0.5
+        - class_path: kornia.augmentation.Normalize
           init_args:
+            mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
-        - class_path: torchvision.transforms.v2.Normalize
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
+            mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
-        - class_path: torchvision.transforms.v2.Normalize
+      augmentations_gpu:
+        - class_path: kornia.augmentation.Normalize
           init_args:
+            mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml b/library/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml
index 15029bc5f6a..dfe81a779d1 100644
--- a/library/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml
+++ b/library/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml
@@ -63,46 +63,43 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            p: 0.5
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
+            mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: false
-            size_divisor: 32
+            size: $(input_size)
+            keep_aspect_ratio: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
+            mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            size: $(input_size)
+            keep_aspect_ratio: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
+            mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml b/library/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml
index cde9d5f068e..c7157ad96ce 100644
--- a/library/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml
+++ b/library/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml
@@ -58,19 +58,7 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
diff --git a/library/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml b/library/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml
index a07f889503d..06e5e16ae38 100644
--- a/library/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml
+++ b/library/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml
@@ -65,34 +65,42 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            p: 0.5
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
+
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
+
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml b/library/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml
index 26c70996d9a..f7469ebdb52 100644
--- a/library/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml
+++ b/library/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml
@@ -65,34 +65,42 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            p: 0.5
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
+
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
+
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml b/library/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml
index 72f3d0e066b..355cbe1ad32 100644
--- a/library/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml
+++ b/library/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml
@@ -60,28 +60,22 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
+            size: $(input_size)
+            keep_aspect_ratio: true
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            size_divisor: 32
+            size: $(input_size)
+            keep_aspect_ratio: true
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            size_divisor: 32
+            size: $(input_size)
+            keep_aspect_ratio: true
diff --git a/library/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml b/library/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml
index ab33332dbf3..24b50f16dfa 100644
--- a/library/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml
+++ b/library/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml
@@ -50,12 +50,10 @@ callbacks:
 
 overrides:
   max_epochs: 100
-
   data:
     input_size:
       - 1344
       - 1344
-
     tile_config:
       enable_tiler: true
       enable_adaptive_tiling: true
@@ -63,34 +61,42 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            p: 0.5
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
+
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
+
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/instance_segmentation/openvino_model.yaml b/library/src/otx/recipe/instance_segmentation/openvino_model.yaml
index 31046d1e714..82c47eb1ea0 100644
--- a/library/src/otx/recipe/instance_segmentation/openvino_model.yaml
+++ b/library/src/otx/recipe/instance_segmentation/openvino_model.yaml
@@ -15,17 +15,18 @@ callbacks:
 
 overrides:
   reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.val_subset.augmentations_cpu
+    - data.test_subset.augmentations_cpu
   data:
+    stack_images: false
     train_subset:
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
     val_subset:
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
     test_subset:
       batch_size: 64
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
diff --git a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_2xlarge.yaml b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_2xlarge.yaml
index 0d15843c8c4..e81741cbb8a 100644
--- a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_2xlarge.yaml
+++ b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_2xlarge.yaml
@@ -64,10 +64,6 @@ callbacks:
 overrides:
   max_epochs: 100
   gradient_clip_val: 0.1
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   data:
     task: INSTANCE_SEGMENTATION
     input_size:
@@ -75,70 +71,36 @@ overrides:
       - 768
     train_subset:
       batch_size: 2
-      num_workers: 4
-      subset_name: train
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 2
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
 
     test_subset:
       batch_size: 2
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
diff --git a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_large.yaml b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_large.yaml
index 6a9611d7532..2f2f81882f8 100644
--- a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_large.yaml
+++ b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_large.yaml
@@ -64,10 +64,6 @@ callbacks:
 overrides:
   max_epochs: 100
   gradient_clip_val: 0.1
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   data:
     task: INSTANCE_SEGMENTATION
     input_size:
@@ -75,70 +71,36 @@ overrides:
       - 504
     train_subset:
       batch_size: 4
-      num_workers: 4
-      subset_name: train
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 4
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
 
     test_subset:
       batch_size: 4
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
diff --git a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_medium.yaml b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_medium.yaml
index c472c017b08..69a88ebdcf2 100644
--- a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_medium.yaml
+++ b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_medium.yaml
@@ -64,10 +64,6 @@ callbacks:
 overrides:
   max_epochs: 100
   gradient_clip_val: 0.1
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   data:
     task: INSTANCE_SEGMENTATION
     input_size:
@@ -75,70 +71,36 @@ overrides:
       - 432
     train_subset:
       batch_size: 4
-      num_workers: 4
-      subset_name: train
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 4
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
 
     test_subset:
       batch_size: 4
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
diff --git a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_nano.yaml b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_nano.yaml
index 766d7fbff20..3207a7701c8 100644
--- a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_nano.yaml
+++ b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_nano.yaml
@@ -64,10 +64,6 @@ callbacks:
 overrides:
   max_epochs: 100
   gradient_clip_val: 0.1
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   data:
     task: INSTANCE_SEGMENTATION
     input_size:
@@ -75,70 +71,36 @@ overrides:
       - 312
     train_subset:
       batch_size: 4
-      num_workers: 4
-      subset_name: train
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 4
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
 
     test_subset:
       batch_size: 4
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
diff --git a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_small.yaml b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_small.yaml
index fdecda7a903..1341fd80432 100644
--- a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_small.yaml
+++ b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_small.yaml
@@ -64,10 +64,6 @@ callbacks:
 overrides:
   max_epochs: 100
   gradient_clip_val: 0.1
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   data:
     task: INSTANCE_SEGMENTATION
     input_size:
@@ -75,70 +71,36 @@ overrides:
       - 384
     train_subset:
       batch_size: 4
-      num_workers: 4
-      subset_name: train
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 4
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
 
     test_subset:
       batch_size: 4
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
diff --git a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_xlarge.yaml b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_xlarge.yaml
index c7e3831a0b6..4a6742cf5e0 100644
--- a/library/src/otx/recipe/instance_segmentation/rfdetr_seg_xlarge.yaml
+++ b/library/src/otx/recipe/instance_segmentation/rfdetr_seg_xlarge.yaml
@@ -64,10 +64,6 @@ callbacks:
 overrides:
   max_epochs: 100
   gradient_clip_val: 0.1
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
   data:
     task: INSTANCE_SEGMENTATION
     input_size:
@@ -75,70 +71,37 @@ overrides:
       - 624
     train_subset:
       batch_size: 2
-      num_workers: 4
-      subset_name: train
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.RandomZoomOut
-          enable: true
           init_args:
             fill: 0
-        - class_path: otx.data.transform_libs.torchvision.RandomIoUCrop
-          enable: true
-          init_args:
-            probability: 1.0
+            p: 0.5
+        - class_path: torchvision.transforms.v2.RandomIoUCrop
         - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
           init_args:
             min_size: 1
-        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
-          enable: true
-          init_args:
-            p: 0.5
-        - class_path: torchvision.transforms.v2.Resize
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-        - class_path: torchvision.transforms.v2.SanitizeBoundingBoxes
+            keep_aspect_ratio: false
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       batch_size: 2
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
 
     test_subset:
       batch_size: 2
-      num_workers: 4
-      to_tv_image: true
-      transforms:
-        - class_path: torchvision.transforms.v2.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            keep_aspect_ratio: false
+            resize_targets: false
+
diff --git a/library/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml b/library/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml
index ba72b9fc2d2..fc18dea3656 100644
--- a/library/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml
+++ b/library/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml
@@ -52,7 +52,13 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
-  reset: data.train_subset.transforms
+  reset:
+    - data.train_subset.augmentations_cpu
+    - data.train_subset.augmentations_gpu
+    - data.val_subset.augmentations_cpu
+    - data.val_subset.augmentations_gpu
+    - data.test_subset.augmentations_cpu
+    - data.test_subset.augmentations_gpu
   precision: 16
   max_epochs: 100
   gradient_clip_val: 35.0
@@ -65,59 +71,28 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.CachedMosaic
+      # ============ CPU Augmentations (before batching, in Dataset workers) ============
+      # Size-dependent ops that need to run before collate for stacking
+      augmentations_cpu:
+        # Mosaic: combines 4 images (pure torch, no numpy)
+        - class_path: otx.data.augmentation.transforms.CachedMosaic
           init_args:
             img_scale: $(input_size)
             max_cached_images: 20
             random_pop: false
-        - class_path: otx.data.transform_libs.torchvision.RandomResize
+        # Random resized crop using torchvision
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size) * 2
-            ratio_range:
+            size: $(input_size)
+            scale:
               - 0.5
               - 2.0
-            keep_ratio: true
-            transform_bbox: true
-            transform_mask: true
-        - class_path: otx.data.transform_libs.torchvision.RandomCrop
-          init_args:
-            crop_size: $(input_size)
-        - class_path: torchvision.transforms.v2.RandomPhotometricDistort
-          enable: false
-          init_args:
-            brightness:
-              - 0.875
-              - 1.125
-            contrast:
-              - 0.5
-              - 1.5
-            saturation:
-              - 0.5
-              - 1.5
-            hue:
-              - -0.05
-              - 0.05
-            p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.YOLOXHSVRandomAug
-        - class_path: otx.data.transform_libs.torchvision.RandomAffine
-          enable: false
-          init_args:
-            max_rotate_degree: 10.0
-            max_translate_ratio: 0.1
-            scaling_ratio_range:
+            ratio:
               - 0.5
-              - 1.5
-            max_shear_degree: 2.0
-        - class_path: otx.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            probability: 0.5
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_to_square: true
-            pad_val: 114
-            transform_mask: true
-        - class_path: otx.data.transform_libs.torchvision.CachedMixUp
+              - 2.0
+            antialias: true
+        # MixUp: blends two images (pure torch)
+        - class_path: otx.data.augmentation.transforms.CachedMixUp
           init_args:
             img_scale: $(input_size)
             ratio_range:
@@ -126,46 +101,75 @@ overrides:
             max_cached_images: 10
             random_pop: false
             probability: 0.5
-        - class_path: torchvision.transforms.v2.RandomVerticalFlip
-          enable: false
+      # ============ GPU Augmentations (after batching, in Lightning Callback) ============
+      # Batch-level ops that run on GPU via Kornia
+      augmentations_gpu:
+        # Random horizontal flip (Kornia, GPU)
+        - class_path: kornia.augmentation.RandomHorizontalFlip
           init_args:
             p: 0.5
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianBlur
-          enable: false
-          init_args:
-            kernel_size: 5
-            sigma:
-              - 0.1
-              - 2.0
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-        - class_path: otx.data.transform_libs.torchvision.RandomGaussianNoise
-          enable: false
+        # Color jitter (Kornia, GPU)
+        - class_path: kornia.augmentation.ColorJiggle
           init_args:
-            mean: 0.0
-            sigma: 0.1
-            probability: 0.5
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
+            brightness: 0.125
+            contrast: 0.5
+            saturation: 0.5
+            hue: 0.05
+            p: 0.5
+        # Normalize (Kornia, GPU) - RTMDet uses BGR order
+        - class_path: kornia.augmentation.Normalize
+          init_args:
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
+
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_val: 114
+      augmentations_cpu:
+        # Resize with aspect ratio preservation (pure torch)
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: true
+            pad_value: 114
+            resize_targets: false
+      augmentations_gpu:
+        # Normalize only (Kornia, GPU)
+        - class_path: kornia.augmentation.Normalize
+          init_args:
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
+
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            pad_val: 114
+      augmentations_cpu:
+        # Resize with aspect ratio preservation (pure torch)
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: true
+            pad_value: 114
+            resize_targets: false
+      augmentations_gpu:
+        # Normalize only (Kornia, GPU)
+        - class_path: kornia.augmentation.Normalize
+          init_args:
+            mean:
+              - 0.485
+              - 0.456
+              - 0.406
+            std:
+              - 0.229
+              - 0.224
+              - 0.225
diff --git a/library/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml b/library/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml
index 08a7b3d1a3a..009d8617194 100644
--- a/library/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml
+++ b/library/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml
@@ -52,6 +52,10 @@ callbacks:
       filename: "checkpoints/epoch_{epoch:03d}"
 
 overrides:
+  reset:
+    - data.train_subset.augmentations_cpu
+    - data.val_subset.augmentations_cpu
+    - data.test_subset.augmentations_cpu
   precision: 16
   max_epochs: 100
   gradient_clip_val: 35.0
@@ -68,34 +72,45 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: true
+            pad_value: 114
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            p: 0.5
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: true
+            pad_value: 114
+            resize_targets: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: true
+            pad_value: 114
+            resize_targets: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/keypoint_detection/openvino_model.yaml b/library/src/otx/recipe/keypoint_detection/openvino_model.yaml
index fc99a9a0089..bbd7c090972 100644
--- a/library/src/otx/recipe/keypoint_detection/openvino_model.yaml
+++ b/library/src/otx/recipe/keypoint_detection/openvino_model.yaml
@@ -4,18 +4,19 @@ data: ../_base_/data/keypoint_detection.yaml
 
 overrides:
   reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.val_subset.augmentations_cpu
+    - data.test_subset.augmentations_cpu
 
   data:
+    stack_images: false
     train_subset:
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
     val_subset:
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
     test_subset:
       batch_size: 64
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
diff --git a/library/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml b/library/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml
index 9dac68e6c12..ad647f3e46a 100644
--- a/library/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml
+++ b/library/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml
@@ -57,31 +57,10 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            std: [1.0, 1.0, 1.0]
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            std: [1.0, 1.0, 1.0]
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            std: [1.0, 1.0, 1.0]
diff --git a/library/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b_tile.yaml b/library/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b_tile.yaml
index ad2874a3575..f3a0953398f 100644
--- a/library/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b_tile.yaml
+++ b/library/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b_tile.yaml
@@ -53,7 +53,6 @@ callbacks:
 overrides:
   task: ROTATED_DETECTION
   max_epochs: 100
-
   data:
     input_size:
       - 512
@@ -65,46 +64,44 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            p: 0.5
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
+            mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
       sampler:
         class_path: otx.data.samplers.balanced_sampler.BalancedSampler
 
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            size: $(input_size)
+            keep_aspect_ratio: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
+            mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
+
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            size: $(input_size)
+            keep_aspect_ratio: false
         - class_path: torchvision.transforms.v2.Normalize
           init_args:
+            mean: [0.0, 0.0, 0.0]
             std: [1.0, 1.0, 1.0]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml b/library/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml
index e1cf9e05871..fad720995c2 100644
--- a/library/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml
+++ b/library/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml
@@ -59,19 +59,7 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Pad
-          init_args:
-            size_divisor: 32
diff --git a/library/src/otx/recipe/rotated_detection/maskrcnn_r50_tile.yaml b/library/src/otx/recipe/rotated_detection/maskrcnn_r50_tile.yaml
index 290e1a63125..a6cf0a1b6e8 100644
--- a/library/src/otx/recipe/rotated_detection/maskrcnn_r50_tile.yaml
+++ b/library/src/otx/recipe/rotated_detection/maskrcnn_r50_tile.yaml
@@ -64,34 +64,42 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            p: 0.5
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
+
     val_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
+
     test_subset:
       num_workers: 4
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
-            keep_ratio: false
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            pad_to_square: false
-            size_divisor: 32
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/rotated_detection/maskrcnn_r50_v2_tile.yaml b/library/src/otx/recipe/rotated_detection/maskrcnn_r50_v2_tile.yaml
index 3ba80cdb64d..2d94bebd22c 100644
--- a/library/src/otx/recipe/rotated_detection/maskrcnn_r50_v2_tile.yaml
+++ b/library/src/otx/recipe/rotated_detection/maskrcnn_r50_v2_tile.yaml
@@ -64,9 +64,42 @@ overrides:
     train_subset:
       batch_size: 4
       num_workers: 8
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.RandomHorizontalFlip
+          init_args:
+            p: 0.5
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
 
     val_subset:
       num_workers: 4
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
 
     test_subset:
       num_workers: 4
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
+          init_args:
+            size: $(input_size)
+            keep_aspect_ratio: false
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/rotated_detection/openvino_model.yaml b/library/src/otx/recipe/rotated_detection/openvino_model.yaml
index 4088ee37ab3..1732208fb42 100644
--- a/library/src/otx/recipe/rotated_detection/openvino_model.yaml
+++ b/library/src/otx/recipe/rotated_detection/openvino_model.yaml
@@ -18,22 +18,19 @@ data: ../_base_/data/instance_segmentation.yaml
 overrides:
   task: ROTATED_DETECTION
   reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.val_subset.augmentations_cpu
+    - data.test_subset.augmentations_cpu
 
   data:
     stack_images: false
     train_subset:
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
     val_subset:
-      to_tv_image: true
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
     test_subset:
-      to_tv_image: true
       batch_size: 24
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
diff --git a/library/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/library/src/otx/recipe/semantic_segmentation/dino_v2.yaml
index 785b956f789..eff4cc5fd68 100644
--- a/library/src/otx/recipe/semantic_segmentation/dino_v2.yaml
+++ b/library/src/otx/recipe/semantic_segmentation/dino_v2.yaml
@@ -54,26 +54,16 @@ overrides:
       - 518
       - 518
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.RandomResizedCrop
+      augmentations_cpu:
+        - class_path: torchvision.transforms.v2.RandomResizedCrop
           init_args:
-            scale: $(input_size)
-            crop_ratio_range:
+            size: $(input_size)
+            scale:
               - 0.08
               - 1.0
-            aspect_ratio_range:
+            ratio:
               - 0.75
               - 1.34
-    val_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-    test_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
   callbacks:
     - class_path: otx.backend.native.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
       init_args:
diff --git a/library/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml b/library/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml
index aec1f835706..64230e1afc6 100644
--- a/library/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml
+++ b/library/src/otx/recipe/semantic_segmentation/dino_v2_tile.yaml
@@ -50,38 +50,34 @@ overrides:
       - 518
       - 518
     train_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-            pad_val:
-              img: 0
-              mask: 255
-            transform_mask: true
-    val_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
+    val_subset:
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-            pad_val:
-              img: 0
-              mask: 255
-            transform_mask: true
-    test_subset:
-      transforms:
-        - class_path: otx.data.transform_libs.torchvision.Resize
+            resize_targets: true
+        - class_path: torchvision.transforms.v2.Normalize
           init_args:
-            scale: $(input_size)
-        - class_path: otx.data.transform_libs.torchvision.Pad
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
+    test_subset:
+      augmentations_cpu:
+        - class_path: otx.data.augmentation.transforms.Resize
           init_args:
             size: $(input_size)
-            pad_val:
-              img: 0
-              mask: 255
-            transform_mask: true
+            resize_targets: true
+        - class_path: torchvision.transforms.v2.Normalize
+          init_args:
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+      augmentations_gpu: []
diff --git a/library/src/otx/recipe/semantic_segmentation/openvino_model.yaml b/library/src/otx/recipe/semantic_segmentation/openvino_model.yaml
index e105953fc89..40e05736343 100644
--- a/library/src/otx/recipe/semantic_segmentation/openvino_model.yaml
+++ b/library/src/otx/recipe/semantic_segmentation/openvino_model.yaml
@@ -3,23 +3,20 @@ callback_monitor: val/Dice
 data: ../_base_/data/semantic_segmentation.yaml
 overrides:
   reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
+    - data.train_subset.augmentations_cpu
+    - data.val_subset.augmentations_cpu
+    - data.test_subset.augmentations_cpu
 
   data:
+    stack_images: false
     train_subset:
-      batch_size: 1
-      num_workers: 2
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
     val_subset:
-      batch_size: 1
-      num_workers: 2
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
     test_subset:
       batch_size: 64
       num_workers: 2
-      transforms:
+      augmentations_cpu:
         - class_path: torchvision.transforms.v2.ToImage
diff --git a/library/src/otx/tools/auto_configurator.py b/library/src/otx/tools/auto_configurator.py
index ea29b9e0aae..85af199e5f4 100644
--- a/library/src/otx/tools/auto_configurator.py
+++ b/library/src/otx/tools/auto_configurator.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024-2025 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Auto-Configurator class & util functions for OTX Auto-Configuration."""
@@ -209,7 +209,7 @@ def get_model(
         self,
         model_name: str | None = None,
         label_info: LabelInfoTypes | None = None,
-        data_input_params: DataInputParams | None = None,
+        data_input_params: DataInputParams | dict | None = None,
     ) -> OTXModel:
         """Retrieves the OTXModel instance based on the provided model name and meta information.
 
@@ -217,8 +217,8 @@ def get_model(
             model_name (str | None): The name of the model to retrieve. If None, the default model will be used.
             label_info (LabelInfoTypes | None): The meta information about the labels.
                 If provided, the number of classes will be updated in the model's configuration.
-            data_input_params (DataInputParams | None): The data input parameters containing the input size,
-                input mean and std.
+            data_input_params (DataInputParams | dict | None, optional): The data input parameters
+                containing the input size, input mean and std.
 
         Returns:
             OTXModel: The instantiated OTXModel instance.
@@ -246,7 +246,9 @@ def get_model(
         model_config = deepcopy(self.config["model"])
 
         if data_input_params is not None:
-            model_config["init_args"]["data_input_params"] = data_input_params.as_dict()
+            model_config["init_args"]["data_input_params"] = (
+                data_input_params if isinstance(data_input_params, dict) else data_input_params.as_dict()
+            )
         elif (datamodule := self.get_datamodule()) is not None:
             # get data_input_params info from datamodule
             if datamodule.input_size is None:
@@ -256,8 +258,8 @@ def get_model(
                 raise ValueError(msg)
             model_config["init_args"]["data_input_params"] = DataInputParams(
                 input_size=datamodule.input_size,
-                mean=datamodule.input_mean,
-                std=datamodule.input_std,
+                mean=datamodule.input_mean if datamodule.input_mean is not None else (0.0, 0.0, 0.0),
+                std=datamodule.input_std if datamodule.input_std is not None else (1.0, 1.0, 1.0),
             ).as_dict()
 
         model_cls = get_model_cls_from_config(Namespace(model_config))
@@ -332,14 +334,11 @@ def update_ov_subset_pipeline(
         ov_config = self._load_default_config(config_path=ov_config_path)["data"]
         subset_config = getattr(datamodule, f"{subset}_subset")
         subset_config.batch_size = ov_config[f"{subset}_subset"]["batch_size"]
-        subset_config.transform_lib_type = ov_config[f"{subset}_subset"]["transform_lib_type"]
-        subset_config.transforms = ov_config[f"{subset}_subset"]["transforms"]
-        subset_config.to_tv_image = ov_config[f"{subset}_subset"]["to_tv_image"]
+        subset_config.augmentations_cpu = ov_config[f"{subset}_subset"]["augmentations_cpu"]
         datamodule.tile_config.enable_tiler = False
         msg = (
             f"For OpenVINO IR models, Update the following {subset} \n"
-            f"\t transforms: {subset_config.transforms} \n"
-            f"\t transform_lib_type: {subset_config.transform_lib_type} \n"
+            f"\t augmentations_cpu: {subset_config.augmentations_cpu} \n"
             f"\t batch_size: {subset_config.batch_size} \n"
             "And the tiler is disabled."
         )
diff --git a/library/src/otx/tools/converter.py b/library/src/otx/tools/converter.py
index 7babe2f27db..371f4995713 100644
--- a/library/src/otx/tools/converter.py
+++ b/library/src/otx/tools/converter.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024-2025 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Converter for v1 config."""
@@ -9,7 +9,7 @@
 import logging
 from enum import Enum
 from pathlib import Path
-from typing import Any
+from typing import Any, ClassVar
 from warnings import warn
 
 import yaml
@@ -143,162 +143,337 @@ class ModelStatus(str, Enum):
 }
 
 
-def update_learning_rate(param_value: float | None, config: dict) -> None:
-    """Update learning rate in the config."""
-    if param_value is None:
-        logging.info("Learning rate is not provided, skipping update.")
-        return
-    optimizer = config["model"]["init_args"]["optimizer"]
-    if isinstance(optimizer, dict) and "init_args" in optimizer:
-        optimizer["init_args"]["lr"] = param_value
-    else:
-        warn("Warning: learning_rate is not updated", stacklevel=1)
+class TransformsUpdater:
+    """Handles augmentation updates for the new CPU/GPU augmentation pipeline.
 
+    Maps Geti augmentation names to OTX/kornia/torchvision class paths and the
+    pipeline stage (cpu or gpu). Parameters come directly from the Geti model template;
+    only a few Geti param names need renaming to match kornia's API.
 
-def update_num_iters(param_value: int | None, config: dict) -> None:
-    """Update max_epochs in the config."""
-    if param_value is None:
-        logging.info("Max epochs is not provided, skipping update.")
-        return
-    config["max_epochs"] = param_value
+    Example Geti model template augmentation section::
 
+        random_affine:
+            enable: false
+            max_rotate_degree: 10.0
+            max_translate_ratio: 0.1
+            scaling_ratio_range: [0.5, 1.5]
+            max_shear_degree: 2.0
+        color_jitter:
+            enable: false
+            brightness: [0.875, 1.125]
+            probability: 0.5
+    """
 
-def update_batch_size(param_value: int | None, config: dict) -> None:
-    """Update batch size in the config."""
-    if param_value is None:
-        logging.info("Batch size is not provided, skipping update.")
-        return
-    config["data"]["train_subset"]["batch_size"] = param_value
-    config["data"]["val_subset"]["batch_size"] = param_value
+    # Geti name -> (class_path, stage)
+    # class_paths is a list to match multiple possible implementations in configs
+    AUGMENTATION_REGISTRY: ClassVar[dict[str, dict]] = {
+        "random_resize_crop": {
+            "class_paths": [
+                "torchvision.transforms.v2.RandomResizedCrop",
+            ],
+            "stage": "cpu",
+        },
+        "random_affine": {
+            "class_paths": ["kornia.augmentation.RandomAffine"],
+            "stage": "gpu",
+        },
+        "random_horizontal_flip": {
+            "class_paths": ["kornia.augmentation.RandomHorizontalFlip"],
+            "stage": "gpu",
+        },
+        "random_vertical_flip": {
+            "class_paths": ["kornia.augmentation.RandomVerticalFlip"],
+            "stage": "gpu",
+        },
+        "gaussian_blur": {
+            "class_paths": ["kornia.augmentation.RandomGaussianBlur"],
+            "stage": "gpu",
+        },
+        "gaussian_noise": {
+            "class_paths": ["kornia.augmentation.RandomGaussianNoise"],
+            "stage": "gpu",
+        },
+        "color_jitter": {
+            "class_paths": ["kornia.augmentation.ColorJiggle"],
+            "stage": "gpu",
+        },
+        "iou_random_crop": {
+            "class_paths": [
+                "torchvision.transforms.v2.RandomIoUCrop",
+            ],
+            "stage": "cpu",
+        },
+        "random_zoom_out": {
+            "class_paths": ["torchvision.transforms.v2.RandomZoomOut"],
+            "stage": "cpu",
+        },
+        "mixup": {
+            "class_paths": ["otx.data.augmentation.transforms.CachedMixUp"],
+            "stage": "cpu",
+        },
+        "mosaic": {
+            "class_paths": ["otx.data.augmentation.transforms.CachedMosaic"],
+            "stage": "cpu",
+        },
+    }
 
+    # Geti param name -> kornia/torchvision param name
+    PARAM_RENAME: ClassVar[dict[str, str]] = {
+        "probability": "p",
+        "sigma": "std",
+        "max_rotate_degree": "degrees",
+        "scaling_ratio_range": "scale",
+        "crop_ratio_range": "scale",
+        "aspect_ratio_range": "ratio",
+        "max_translate_ratio": "translate",
+        "max_shear_degree": "shear",
+    }
 
-def update_early_stopping(early_stopping_cfg: dict | None, config: dict) -> None:
-    """Update early stopping parameters in the config."""
-    if early_stopping_cfg is None:
-        logging.info("Early stopping parameters are not provided, skipping update.")
-        return
+    @classmethod
+    def update(cls, augmentation_params: dict, config: dict) -> None:
+        """Update augmentations in the config based on Geti model template.
 
-    enable = early_stopping_cfg["enable"]
-    patience = early_stopping_cfg["patience"]
+        For each augmentation in augmentation_params:
+        - If enable=True and aug exists in config -> update its parameters
+        - If enable=True and aug does NOT exist -> add it with template params
+        - If enable=False and aug exists -> remove it
+        - If enable=False and aug does NOT exist -> no-op
 
-    idx = GetiConfigConverter.get_callback_idx(
-        config["callbacks"],
-        "otx.backend.native.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup",
-    )
-    if not enable and idx > -1:
-        config["callbacks"].pop(idx)
-        return
+        Special case: disabling random_resize_crop replaces it with plain Resize.
+
+        Args:
+            augmentation_params: Dict mapping Geti aug names to their parameter dicts.
+            config: The full OTX config dictionary.
+        """
+        if not augmentation_params:
+            return
+
+        tiling = config["data"].get("tile_config", {}).get("enable_tiler", False)
+        train_subset = config["data"]["train_subset"]
+
+        for aug_name, aug_value in augmentation_params.items():
+            if aug_name not in cls.AUGMENTATION_REGISTRY:
+                if tiling:
+                    logging.info("Augmentation '%s' is not applicable in Tiling pipeline", aug_name)
+                    continue
+                msg = f"Unknown augmentation: '{aug_name}'. Available: {list(cls.AUGMENTATION_REGISTRY.keys())}"
+                raise ValueError(msg)
+
+            registry_entry = cls.AUGMENTATION_REGISTRY[aug_name]
+            # Work on a copy so we don't mutate the original
+            params = dict(aug_value)
+            enable = params.pop("enable", True)
+            stage_key = f"augmentations_{registry_entry['stage']}"
+
+            # Ensure the stage list exists
+            if stage_key not in train_subset:
+                train_subset[stage_key] = []
+
+            aug_list = train_subset[stage_key]
+            existing_idx = cls._find_augmentation(aug_list, registry_entry["class_paths"])
+
+            if enable:
+                init_args = cls._remap_params(params)
+
+                if existing_idx is not None:
+                    # Update existing augmentation parameters
+                    aug_config = aug_list[existing_idx]
+                    if "init_args" not in aug_config:
+                        aug_config["init_args"] = {}
+                    aug_config["init_args"].update(init_args)
+                    aug_config.pop("enable", None)
+                else:
+                    # Add new augmentation with template params
+                    new_aug: dict[str, Any] = {"class_path": registry_entry["class_paths"][0]}
+                    if init_args:
+                        new_aug["init_args"] = init_args
+                    insert_idx = cls._get_insert_position(aug_list, registry_entry["stage"])
+                    aug_list.insert(insert_idx, new_aug)
+            elif existing_idx is not None:
+                if aug_name == "random_resize_crop":
+                    # Replace crop with simple Resize to keep the pipeline valid
+                    aug_list[existing_idx] = {
+                        "class_path": "otx.data.augmentation.transforms.Resize",
+                        "init_args": {"size": "$(input_size)"},
+                    }
+                else:
+                    aug_list.pop(existing_idx)
+
+    @classmethod
+    def _remap_params(cls, params: dict) -> dict:
+        """Rename Geti parameter names to kornia/torchvision names and adjust values.
+
+        1. Rename keys via PARAM_RENAME (probability->p, max_translate_ratio->translate, etc.)
+        2. Adjust values where kornia expects a different format than a single scalar.
+        """
+        # Step 1: rename keys
+        init_args: dict[str, Any] = {}
+        for key, value in params.items():
+            if value is None:
+                continue
+            init_args[cls.PARAM_RENAME.get(key, key)] = value
+
+        # Step 2: adjust values to match kornia expected formats
+        if "translate" in init_args and not isinstance(init_args["translate"], list):
+            v = init_args["translate"]
+            init_args["translate"] = [v, v]
+        if "shear" in init_args and not isinstance(init_args["shear"], list):
+            v = init_args["shear"]
+            init_args["shear"] = [-v, v]
+        if "kernel_size" in init_args and isinstance(init_args["kernel_size"], int):
+            v = init_args["kernel_size"]
+            init_args["kernel_size"] = [v, v]
+
+        return init_args
+
+    @staticmethod
+    def _find_augmentation(aug_list: list[dict], class_paths: list[str]) -> int | None:
+        """Find the index of an augmentation in the list by its class path."""
+        for idx, aug_config in enumerate(aug_list):
+            if aug_config.get("class_path") in class_paths:
+                return idx
+        return None
+
+    @staticmethod
+    def _get_insert_position(aug_list: list[dict], stage: str) -> int:
+        """Determine where to insert a new augmentation.
+
+        GPU: insert before Normalize (should always be last).
+        CPU: insert before Resize or at the end.
+        """
+        if stage == "gpu":
+            for idx, aug in enumerate(aug_list):
+                if "Normalize" in aug.get("class_path", ""):
+                    return idx
+        elif stage == "cpu":
+            for idx, aug in enumerate(aug_list):
+                class_path = aug.get("class_path", "")
+                if "Resize" in class_path and "RandomResizedCrop" not in class_path:
+                    return idx
+        return len(aug_list)
+
+    @staticmethod
+    def update_tiling(tiling_dict: dict | None, config: dict) -> None:
+        """Update tiling parameters in the config.
+
+        Args:
+            tiling_dict: Dict with keys: enable, adaptive_tiling, tile_size, tile_overlap.
+            config: The full OTX config dictionary.
+        """
+        if tiling_dict is None:
+            logging.info("Tiling parameters are not provided, skipping update.")
+            return
+
+        config["data"]["tile_config"]["enable_tiler"] = tiling_dict["enable"]
+        if tiling_dict["enable"]:
+            config["data"]["tile_config"]["enable_adaptive_tiling"] = tiling_dict["adaptive_tiling"]
+            config["data"]["tile_config"]["tile_size"] = (
+                tiling_dict["tile_size"],
+                tiling_dict["tile_size"],
+            )
+            config["data"]["tile_config"]["overlap"] = tiling_dict["tile_overlap"]
 
-    config["callbacks"][idx]["init_args"]["patience"] = patience
 
+class HyperparametersUpdater:
+    """Handles training hyperparameter updates (learning rate, batch size, etc.)."""
 
-def update_tiling(tiling_dict: dict | None, config: dict) -> None:
-    """Update tiling parameters in the config."""
-    if tiling_dict is None:
-        logging.info("Tiling parameters are not provided, skipping update.")
-        return
+    @staticmethod
+    def update(hyperparameters: dict, config: dict) -> None:
+        """Update hyperparameters in the config.
+
+        Supported keys:
+        - learning_rate: float
+        - batch_size: int
+        - max_epochs: int (alias for num_iters)
+        - early_stopping: dict with keys {enable, patience}
+        - input_size: tuple (height, width)
+
+        Args:
+            hyperparameters: Dict of hyperparameter updates.
+            config: The full OTX config dictionary.
+        """
+        for key, value in hyperparameters.items():
+            if key == "learning_rate":
+                HyperparametersUpdater._update_learning_rate(value, config)
+            elif key == "batch_size":
+                HyperparametersUpdater._update_batch_size(value, config)
+            elif key == "max_epochs":
+                HyperparametersUpdater._update_max_epochs(value, config)
+            elif key == "early_stopping":
+                HyperparametersUpdater._update_early_stopping(value, config)
+            elif key == "input_size":
+                HyperparametersUpdater._update_input_size(value, config)
+
+    @staticmethod
+    def _update_learning_rate(param_value: float | None, config: dict) -> None:
+        """Update learning rate in the optimizer config."""
+        if param_value is None:
+            logging.info("Learning rate is not provided, skipping update.")
+            return
+        optimizer = config["model"]["init_args"]["optimizer"]
+        if isinstance(optimizer, dict) and "init_args" in optimizer:
+            optimizer["init_args"]["lr"] = param_value
+        else:
+            warn("Warning: learning_rate is not updated", stacklevel=1)
 
-    config["data"]["tile_config"]["enable_tiler"] = tiling_dict["enable"]
-    if tiling_dict["enable"]:
-        config["data"]["tile_config"]["enable_adaptive_tiling"] = tiling_dict["adaptive_tiling"]
-        config["data"]["tile_config"]["tile_size"] = (
-            tiling_dict["tile_size"],
-            tiling_dict["tile_size"],
+    @staticmethod
+    def _update_batch_size(param_value: int | None, config: dict) -> None:
+        """Update batch size for train and val subsets."""
+        if param_value is None:
+            logging.info("Batch size is not provided, skipping update.")
+            return
+        config["data"]["train_subset"]["batch_size"] = param_value
+        config["data"]["val_subset"]["batch_size"] = param_value
+
+    @staticmethod
+    def _update_max_epochs(param_value: int | None, config: dict) -> None:
+        """Update max_epochs in the config."""
+        if param_value is None:
+            logging.info("Max epochs is not provided, skipping update.")
+            return
+        config["max_epochs"] = param_value
+
+    @staticmethod
+    def _update_early_stopping(early_stopping_cfg: dict | None, config: dict) -> None:
+        """Update early stopping parameters in the config."""
+        if early_stopping_cfg is None:
+            logging.info("Early stopping parameters are not provided, skipping update.")
+            return
+
+        enable = early_stopping_cfg["enable"]
+        patience = early_stopping_cfg.get("patience")
+
+        idx = GetiConfigConverter.get_callback_idx(
+            config["callbacks"],
+            "otx.backend.native.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup",
         )
-        config["data"]["tile_config"]["overlap"] = tiling_dict["tile_overlap"]
+        if not enable and idx > -1:
+            config["callbacks"].pop(idx)
+            return
 
+        if patience is not None:
+            config["callbacks"][idx]["init_args"]["patience"] = patience
 
-def update_input_size(height: int | None, width: int | None, config: dict) -> None:
-    """Update input size in the config."""
-    if height is None or width is None:
-        logging.info("Input size is not provided, skipping update.")
-        return
-    config["data"]["input_size"] = (height, width)
+    @staticmethod
+    def _update_input_size(size_value: tuple[int, int] | None, config: dict) -> None:
+        """Update input size in the config.
+
+        Args:
+            size_value: Tuple of (height, width) or None.
+            config: The full OTX config dictionary.
+        """
+        if size_value is None or any(v is None for v in size_value):
+            logging.info("Input size is not provided, skipping update.")
+            return
+        config["data"]["input_size"] = size_value
 
 
 def update_augmentations(augmentation_params: dict, config: dict) -> None:
     """Update augmentations in the config.
 
-    Example:
-        augmentation_params = {
-            random_affine = {
-                "enable": True,
-                "scaling_ratio_range": [0.1, 2.0]
-            },
-            gaussian_blur = {
-                "enable": True,
-                "kernel_size": 5
-            }
-            ...
-        }
+    Delegates to TransformsUpdater which handles the new CPU/GPU augmentation pipeline.
     """
-    if not augmentation_params:
-        return
-
-    tiling = config["data"]["tile_config"]["enable_tiler"]
-    # this list maps Geti user frendly naming to OTX aug classes
-    augs_mapping_list = {
-        "random_resize_crop": [
-            "otx.data.transform_libs.torchvision.EfficientNetRandomCrop",
-            "otx.data.transform_libs.torchvision.RandomResizedCrop",
-        ],
-        "random_affine": ["otx.data.transform_libs.torchvision.RandomAffine"],
-        "topdown_affine": ["otx.data.transform_libs.torchvision.TopdownAffine"],
-        "random_horizontal_flip": ["otx.data.transform_libs.torchvision.RandomFlip"],
-        "random_vertical_flip": ["torchvision.transforms.v2.RandomVerticalFlip"],
-        "gaussian_blur": ["otx.data.transform_libs.torchvision.RandomGaussianBlur"],
-        "gaussian_noise": ["otx.data.transform_libs.torchvision.RandomGaussianNoise"],
-        "color_jitter": ["torchvision.transforms.v2.RandomPhotometricDistort"],
-        "photometric_distort": ["otx.data.transform_libs.torchvision.PhotoMetricDistortion"],
-        "iou_random_crop": [
-            "otx.data.transform_libs.torchvision.MinIoURandomCrop",
-            "otx.data.transform_libs.torchvision.RandomIoUCrop",
-        ],
-        "random_zoom_out": ["torchvision.transforms.v2.RandomZoomOut"],
-        "hsv_random_aug": ["otx.data.transform_libs.torchvision.YOLOXHSVRandomAug"],
-        "mixup": ["otx.data.transform_libs.torchvision.CachedMixUp"],
-        "mosaic": ["otx.data.transform_libs.torchvision.CachedMosaic"],
-    }
-
-    for aug_name, aug_value in augmentation_params.items():
-        aug_classes = augs_mapping_list[aug_name]
-        found = False
-        for aug_config in config["data"]["train_subset"]["transforms"]:
-            if aug_config["class_path"] in aug_classes:
-                found = True
-                if "init_args" not in aug_config:
-                    aug_config["init_args"] = {}
-                if aug_name == "random_resize_crop" and not aug_value["enable"]:
-                    # if random crop is disabled -> change this augmentation to simple Resize
-                    aug_config["class_path"] = "otx.data.transform_libs.torchvision.Resize"
-                    break
-                if "TopdownAffine" in aug_config["class_path"]:
-                    affine_transforms_prob = aug_value.pop("probability", 1.0)
-                    if affine_transforms_prob is not None:
-                        aug_config["init_args"]["probability"] = affine_transforms_prob if aug_value["enable"] else 0.0
-                        if aug_config["init_args"]["probability"] < 0.7:
-                            for val_aug_cfg in config["data"]["val_subset"]["transforms"]:
-                                if "Pad" in val_aug_cfg["class_path"]:
-                                    val_aug_cfg["enable"] = False
-
-                    break
-
-                aug_config["enable"] = aug_value.pop("enable")
-                for parameter in aug_value:
-                    value = aug_value[parameter]
-                    if value is not None:
-                        override_parameter = (
-                            "p"
-                            if parameter == "probability" and "torchvision.transforms.v2" in aug_config["class_path"]
-                            else parameter
-                        )  # Geti consistency fix
-                        aug_config["init_args"][override_parameter] = value
-                break
-
-        if not found and not tiling:
-            msg = f"Augmentation {aug_name} is not found for this model."
-            raise ValueError(msg)
-        logging.info("This augmentation is not applicable in Tiling pipeline")
+    TransformsUpdater.update(augmentation_params, config)
 
 
 class GetiConfigConverter:
@@ -375,22 +550,31 @@ def _get_params(hyperparameters: dict) -> dict:
 
     @staticmethod
     def _update_params(config: dict, param_dict: dict) -> None:
-        """Update params of OTX recipe from Geit configurable params."""
+        """Update params of OTX recipe from Geti configurable params.
+
+        Uses TransformsUpdater and HyperparametersUpdater classes to apply updates
+        from the Geti model template to the OTX recipe config.
+        """
         augmentation_params = param_dict.get("dataset_preparation", {}).get("augmentation", {})
         tiling = augmentation_params.pop("tiling", None)
         training_parameters = param_dict.get("training", {})
 
-        update_tiling(tiling, config)
-        update_augmentations(augmentation_params, config)
-        update_learning_rate(training_parameters.get("learning_rate", None), config)
-        update_batch_size(training_parameters.get("batch_size", None), config)
-        update_num_iters(training_parameters.get("max_epochs", None), config)
-        update_early_stopping(training_parameters.get("early_stopping", None), config)
-        update_input_size(
-            training_parameters.get("input_size_height", None),
-            training_parameters.get("input_size_width", None),
-            config,
-        )
+        # Update augmentations and tiling
+        TransformsUpdater.update_tiling(tiling, config)
+        TransformsUpdater.update(augmentation_params, config)
+
+        # Update training hyperparameters
+        hyperparams = {
+            "learning_rate": training_parameters.get("learning_rate"),
+            "batch_size": training_parameters.get("batch_size"),
+            "max_epochs": training_parameters.get("max_epochs"),
+            "early_stopping": training_parameters.get("early_stopping"),
+            "input_size": (
+                training_parameters.get("input_size_height"),
+                training_parameters.get("input_size_width"),
+            ),
+        }
+        HyperparametersUpdater.update(hyperparams, config)
 
     @staticmethod
     def get_callback_idx(callbacks: list, name: str) -> int:
@@ -462,8 +646,8 @@ def instantiate(
             raise ValueError(msg)
         model_config["init_args"]["data_input_params"] = DataInputParams(
             input_size=datamodule.input_size,
-            mean=datamodule.input_mean,
-            std=datamodule.input_std,
+            mean=datamodule.input_mean if datamodule.input_mean is not None else (0.0, 0.0, 0.0),
+            std=datamodule.input_std if datamodule.input_std is not None else (1.0, 1.0, 1.0),
         ).as_dict()
         model_parser = ArgumentParser()
         model_parser.add_subclass_arguments(OTXModel, "model", required=False, fail_untyped=False, skip={"label_info"})
diff --git a/library/tests/assets/classification_dataset_16bit/data.parquet b/library/tests/assets/classification_dataset_16bit/data.parquet
new file mode 100644
index 00000000000..3bb06d42eef
Binary files /dev/null and b/library/tests/assets/classification_dataset_16bit/data.parquet differ
diff --git a/library/tests/assets/classification_dataset_16bit/download.py b/library/tests/assets/classification_dataset_16bit/download.py
new file mode 100644
index 00000000000..2f20584f3dd
--- /dev/null
+++ b/library/tests/assets/classification_dataset_16bit/download.py
@@ -0,0 +1,160 @@
+# Copyright (C) 2025-2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""Generate a synthetic 16-bit (uint16) classification dataset in Datumaro experimental format.
+
+The script:
+1. Creates synthetic 32x32 RGB images stored as ``uint16`` tensors
+   (values in 0-65535 range) with two classes: "circle" and "square".
+2. Wraps every image in a ``ClassificationSample``.
+3. Stores all samples in a ``datumaro.experimental.Dataset``.
+4. Exports the dataset to disk with ``export_dataset``.
+
+This dataset exercises the 16-bit image path (``storage_dtype="uint16"``)
+in the data pipeline.
+
+Usage
+-----
+    python tests/assets/classification_dataset_16bit/download.py [--output_dir OUTPUT_DIR]
+
+The default output directory is
+``tests/assets/classification_dataset_16bit``.
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+import polars as pl
+import torch
+from datumaro.experimental import Dataset
+from datumaro.experimental.categories import LabelCategories
+from datumaro.experimental.export_import import export_dataset
+from datumaro.experimental.fields import ImageInfo as DmImageInfo
+from datumaro.experimental.fields import Subset
+from datumaro.experimental.fields.images import ImageField
+from torchvision import tv_tensors
+
+from otx.data.entity.sample import ClassificationSample
+
+CLASS_NAMES: tuple[str, ...] = ("circle", "square")
+SAMPLES_PER_CLASS_TRAIN: int = 4
+SAMPLES_PER_CLASS_VAL: int = 1
+SAMPLES_PER_CLASS_TEST: int = 1
+IMAGE_SIZE: int = 32
+
+
+def _make_circle_image(h: int, w: int) -> torch.Tensor:
+    """Create a 3xHxW uint16 tensor with a bright circle in the centre."""
+    img = torch.zeros(3, h, w, dtype=torch.int32)
+    cy, cx = h // 2, w // 2
+    r = min(h, w) // 4
+    for y in range(h):
+        for x in range(w):
+            if (y - cy) ** 2 + (x - cx) ** 2 <= r**2:
+                img[:, y, x] = 50000  # bright circle on dark background
+    return img.to(torch.int32)
+
+
+def _make_square_image(h: int, w: int) -> torch.Tensor:
+    """Create a 3xHxW uint16 tensor with a bright square in the centre."""
+    img = torch.zeros(3, h, w, dtype=torch.int32)
+    cy, cx = h // 2, w // 2
+    half = min(h, w) // 4
+    img[:, cy - half : cy + half, cx - half : cx + half] = 40000
+    return img.to(torch.int32)
+
+
+def _generate_image(label_idx: int, seed: int) -> torch.Tensor:
+    """Generate a synthetic 3xHxW uint16 image for the given class."""
+    torch.manual_seed(seed)
+    img = _make_circle_image(IMAGE_SIZE, IMAGE_SIZE) if label_idx == 0 else _make_square_image(IMAGE_SIZE, IMAGE_SIZE)
+    # Add slight random noise to make images unique
+    noise = torch.randint(0, 1000, (3, IMAGE_SIZE, IMAGE_SIZE), dtype=torch.int32)
+    return (img + noise).clamp(0, 65535).to(torch.int32)
+
+
+def _build_dataset() -> Dataset:
+    """Build a datumaro.experimental.Dataset of ClassificationSample objects with uint16 images."""
+    categories = {"label": LabelCategories(labels=CLASS_NAMES)}
+
+    # Override image field to use UInt16 dtype
+    schema = ClassificationSample.infer_schema()
+    img_attr = schema.attributes["image"]
+    img_attr.field = ImageField(
+        semantic="default",
+        dtype=pl.UInt16(),
+        channels_first=True,
+        format="RGB",
+    )
+    schema.attributes["image"] = img_attr
+
+    dataset: Dataset = Dataset(ClassificationSample, categories=categories, schema=schema)  # type: ignore[arg-type]
+
+    seed = 0
+    for subset, count in [
+        (Subset.TRAINING, SAMPLES_PER_CLASS_TRAIN),
+        (Subset.VALIDATION, SAMPLES_PER_CLASS_VAL),
+        (Subset.TESTING, SAMPLES_PER_CLASS_TEST),
+    ]:
+        for label_idx in range(len(CLASS_NAMES)):
+            for _ in range(count):
+                img_tensor = _generate_image(label_idx, seed)
+                image = tv_tensors.Image(img_tensor)
+                sample = ClassificationSample(
+                    image=image,
+                    label=torch.tensor(label_idx, dtype=torch.uint8),
+                    dm_image_info=DmImageInfo(width=IMAGE_SIZE, height=IMAGE_SIZE),
+                    subset=subset,
+                )
+                dataset.append(sample)
+                seed += 1
+
+    return dataset
+
+
+def main(output_dir: Path | None = None) -> None:
+    """Generate and export the 16-bit classification dataset."""
+    import shutil
+    import tempfile
+
+    if output_dir is None:
+        output_dir = Path(__file__).resolve().parent
+
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    print("Generating synthetic 16-bit classification dataset ...")
+    dataset = _build_dataset()
+    print(f"  Dataset length: {len(dataset)}")
+
+    # export_dataset requires the output dir to not exist,
+    # so export to a temp directory first, then move files.
+    with tempfile.TemporaryDirectory() as tmpdir:
+        export_path = Path(tmpdir) / "export"
+        print(f"Exporting dataset to {export_path} ...")
+        export_dataset(dataset, export_path)
+
+        # Move exported files to the output directory
+        for f in export_path.iterdir():
+            dest = output_dir / f.name
+            if dest.exists():
+                if dest.is_dir():
+                    shutil.rmtree(dest)
+                else:
+                    dest.unlink()
+            shutil.move(str(f), str(dest))
+
+    print("Done ✓")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate a synthetic 16-bit classification dataset.")
+    parser.add_argument(
+        "--output_dir",
+        type=Path,
+        default=None,
+        help="Directory to save the dataset. Defaults to tests/assets/classification_dataset_16bit",
+    )
+    args = parser.parse_args()
+    main(output_dir=args.output_dir)
diff --git a/library/tests/assets/classification_dataset_16bit/metadata.json b/library/tests/assets/classification_dataset_16bit/metadata.json
new file mode 100644
index 00000000000..9b68f6e268f
--- /dev/null
+++ b/library/tests/assets/classification_dataset_16bit/metadata.json
@@ -0,0 +1,74 @@
+{
+  "version": "2.0.0",
+  "schema": {
+    "attributes": {
+      "image": {
+        "type": [
+          {
+            "name": "Image",
+            "module": "torchvision.tv_tensors._image"
+          },
+          {
+            "name": "Tensor",
+            "module": "torch"
+          }
+        ],
+        "type_module": "__union__",
+        "field": {
+          "type": "ImageField",
+          "semantic": "default",
+          "dtype": "UInt16",
+          "channels_first": true,
+          "format": "RGB"
+        }
+      },
+      "subset": {
+        "type": "Subset",
+        "type_module": "datumaro.experimental.fields.datasets",
+        "field": {
+          "type": "SubsetField",
+          "semantic": "default",
+          "categories": null,
+          "dtype": "Categorical"
+        }
+      },
+      "label": {
+        "type": "Tensor",
+        "type_module": "torch",
+        "field": {
+          "type": "LabelField",
+          "semantic": "default",
+          "dtype": "UInt8",
+          "multi_label": false,
+          "is_list": false
+        }
+      },
+      "dm_image_info": {
+        "type": "ImageInfo",
+        "type_module": "datumaro.experimental.fields.images",
+        "field": {
+          "type": "ImageInfoField",
+          "semantic": "default",
+          "dtype": "Struct({'width': Int32, 'height': Int32})"
+        }
+      }
+    },
+    "categories": {
+      "label": {
+        "type": "LabelCategories",
+        "labels": [
+          "circle",
+          "square"
+        ],
+        "group_type": "EXCLUSIVE",
+        "label_semantics": {}
+      }
+    }
+  },
+  "object_columns": [],
+  "videos": {
+    "fields": [],
+    "export_mode": "copy",
+    "original_paths": {}
+  }
+}
\ No newline at end of file
diff --git a/library/tests/assets/detection_coco/download.py b/library/tests/assets/detection_coco/download.py
index 0cd249176ed..4c874cce01e 100644
--- a/library/tests/assets/detection_coco/download.py
+++ b/library/tests/assets/detection_coco/download.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Download a ~50-sample COCO 2017 subset for object detection.
@@ -130,7 +130,7 @@ def _build_samples(
         sample = DetectionSample(
             image=image,
             label=torch.tensor(labels_list, dtype=torch.long),
-            bboxes=np.array(bboxes_list, dtype=np.float32),
+            bboxes=torch.tensor(bboxes_list, dtype=torch.float32),
             dm_image_info=DmImageInfo(width=IMG_SIZE, height=IMG_SIZE),
             subset=subset,
         )
diff --git a/library/tests/assets/hierarchical_classification_cifar100/data.parquet b/library/tests/assets/hierarchical_classification_cifar100/data.parquet
index 5dd99c605d7..689403efd62 100644
Binary files a/library/tests/assets/hierarchical_classification_cifar100/data.parquet and b/library/tests/assets/hierarchical_classification_cifar100/data.parquet differ
diff --git a/library/tests/assets/hierarchical_classification_cifar100/download.py b/library/tests/assets/hierarchical_classification_cifar100/download.py
index e946a7dde24..3390a1f9882 100644
--- a/library/tests/assets/hierarchical_classification_cifar100/download.py
+++ b/library/tests/assets/hierarchical_classification_cifar100/download.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Download a ~40-sample CIFAR-100 subset and export it as a hierarchical classification dataset.
@@ -375,7 +375,7 @@ def _add_items(
             PILImage.fromarray(img_np).save(images_dir / filename)
 
             # Hierarchical label: only fine label index (hierarchy is encoded in categories)
-            label = np.array([fine_remap[fine_label]], dtype=np.uint8)
+            label = torch.tensor([fine_remap[fine_label]], dtype=torch.long)
 
             sample = ClassificationHierarchicalSample(
                 image=image,
diff --git a/library/tests/assets/instance_segmentation_coco/download.py b/library/tests/assets/instance_segmentation_coco/download.py
index 80782ce3e8a..1dc3fa6c63a 100644
--- a/library/tests/assets/instance_segmentation_coco/download.py
+++ b/library/tests/assets/instance_segmentation_coco/download.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Download a ~50-sample COCO 2017 subset for instance segmentation.
@@ -148,7 +148,7 @@ def _build_dataset(
         if not bboxes_list:
             continue
 
-        bboxes = np.array(bboxes_list, dtype=np.float32)
+        bboxes = torch.tensor(bboxes_list, dtype=torch.float32)
         labels = torch.tensor(labels_list, dtype=torch.long)
         masks = tv_tensors.Mask(torch.from_numpy(np.stack(masks_list, axis=0).astype(np.uint8)))  # type: ignore[arg-type]
 
diff --git a/library/tests/assets/multilabel_classification_coco/download.py b/library/tests/assets/multilabel_classification_coco/download.py
index a20b0ca8765..5a50bfbebe8 100644
--- a/library/tests/assets/multilabel_classification_coco/download.py
+++ b/library/tests/assets/multilabel_classification_coco/download.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Download a ~50-sample COCO 2017 subset for multi-label classification.
@@ -121,7 +121,7 @@ def _build_samples(
         if not unique_cats:
             continue
 
-        label_indices = np.array([cat_id_to_idx[c] for c in unique_cats], dtype=np.uint8)
+        label_indices = torch.tensor([cat_id_to_idx[c] for c in unique_cats], dtype=torch.long)
 
         sample = ClassificationMultiLabelSample(
             image=image,
diff --git a/library/tests/conftest.py b/library/tests/conftest.py
index dd15658e917..49ce7596ef1 100644
--- a/library/tests/conftest.py
+++ b/library/tests/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2025 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
@@ -12,7 +12,7 @@
 from torch import LongTensor
 from torch.utils._pytree import register_pytree_node
 from torchvision import tv_tensors
-from torchvision.tv_tensors import Image, Mask
+from torchvision.tv_tensors import Mask
 
 from otx.data.entity.base import ImageInfo
 from otx.data.entity.sample import OTXPredictionBatch, OTXSampleBatch
@@ -299,6 +299,7 @@ def fxt_h_label_cls_data_entity() -> tuple[MockSample, OTXSampleBatch, OTXPredic
 def fxt_det_data_entity() -> tuple[tuple, MockSample, OTXSampleBatch]:
     img_size = (64, 64)
     fake_image = torch.zeros(size=(3, *img_size), dtype=torch.float32)
+    fake_images = fake_image.unsqueeze(0)  # (1, 3, H, W)
     fake_image_info = ImageInfo(img_idx=0, img_shape=img_size, ori_shape=img_size)
     fake_bboxes = tv_tensors.BoundingBoxes(data=torch.Tensor([0, 0, 5, 5]), format="xyxy", canvas_size=(10, 10))
     fake_labels = LongTensor([1])
@@ -310,13 +311,13 @@ def fxt_det_data_entity() -> tuple[tuple, MockSample, OTXSampleBatch]:
         label=fake_labels,
     )
     batch_data_entity = OTXSampleBatch(
-        images=[Image(fake_image)],
+        images=fake_images,
         imgs_info=[fake_image_info],
         bboxes=[fake_bboxes],
         labels=[fake_labels],
     )
     batch_pred_data_entity = OTXPredictionBatch(
-        images=[Image(fake_image)],
+        images=fake_images,
         imgs_info=[fake_image_info],
         bboxes=[fake_bboxes],
         labels=[fake_labels],
@@ -330,6 +331,7 @@ def fxt_det_data_entity() -> tuple[tuple, MockSample, OTXSampleBatch]:
 def fxt_inst_seg_data_entity() -> tuple[tuple, MockSample, OTXSampleBatch]:
     img_size = (64, 64)
     fake_image = torch.zeros(size=(3, *img_size), dtype=torch.float32)
+    fake_images = fake_image.unsqueeze(0)  # (1, 3, H, W)
     fake_image_info = ImageInfo(img_idx=0, img_shape=img_size, ori_shape=img_size)
     fake_bboxes = tv_tensors.BoundingBoxes(data=torch.Tensor([0, 0, 5, 5]), format="xyxy", canvas_size=(10, 10))
     fake_labels = LongTensor([1])
@@ -344,14 +346,14 @@ def fxt_inst_seg_data_entity() -> tuple[tuple, MockSample, OTXSampleBatch]:
         label=fake_labels,
     )
     batch_data_entity = OTXSampleBatch(
-        images=[Image(data=fake_image)],
+        images=fake_images,
         imgs_info=[fake_image_info],
         bboxes=[fake_bboxes],
         labels=[fake_labels],
         masks=[fake_masks],
     )
     batch_pred_data_entity = OTXPredictionBatch(
-        images=[Image(data=fake_image)],
+        images=fake_images,
         imgs_info=[fake_image_info],
         bboxes=[fake_bboxes],
         labels=[fake_labels],
@@ -364,7 +366,8 @@ def fxt_inst_seg_data_entity() -> tuple[tuple, MockSample, OTXSampleBatch]:
 @pytest.fixture(scope="session")
 def fxt_seg_data_entity() -> tuple[tuple, MockSample, OTXSampleBatch]:
     img_size = (32, 32)
-    fake_image = torch.zeros(size=(3, *img_size), dtype=torch.uint8).numpy()
+    fake_image = torch.zeros(size=(3, *img_size), dtype=torch.float32)
+    fake_images = fake_image.unsqueeze(0)  # (1, 3, H, W)
     fake_image_info = ImageInfo(img_idx=0, img_shape=img_size, ori_shape=img_size)
     fake_masks = Mask(torch.randint(low=0, high=2, size=img_size, dtype=torch.uint8))
     # define data entity
@@ -374,12 +377,12 @@ def fxt_seg_data_entity() -> tuple[tuple, MockSample, OTXSampleBatch]:
         masks=fake_masks,
     )
     batch_data_entity = OTXSampleBatch(
-        images=[Image(data=torch.from_numpy(fake_image))],
+        images=fake_images,
         imgs_info=[fake_image_info],
         masks=[fake_masks],
     )
     batch_pred_data_entity = OTXPredictionBatch(
-        images=[Image(data=torch.from_numpy(fake_image))],
+        images=fake_images,
         imgs_info=[fake_image_info],
         masks=[fake_masks],
         scores=[],
diff --git a/library/tests/integration/api/test_augmentation.py b/library/tests/integration/api/test_augmentation.py
deleted file mode 100644
index d5aecc2a644..00000000000
--- a/library/tests/integration/api/test_augmentation.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from __future__ import annotations
-
-import itertools
-
-import pytest
-from datumaro.experimental.export_import import import_dataset
-
-from otx.config.data import SamplerConfig, SubsetConfig
-from otx.data.factory import OTXDatasetFactory
-from otx.tools.auto_configurator import AutoConfigurator
-from otx.types.task import OTXTaskType
-
-
-def _test_augmentation(
-    recipe: str,
-    target_dataset_per_task: dict,
-    configurable_augs: list[str],
-) -> None:
-    # Load recipe
-    recipe_tokens = recipe.split("/")
-    task_name = recipe_tokens[-2]
-    task = OTXTaskType(task_name.upper())
-    config = AutoConfigurator(
-        data_root=target_dataset_per_task[task_name],
-        task=task,
-    ).config
-    train_config = config["data"]["train_subset"]
-    input_size = 32
-    train_config["input_size"] = (input_size, input_size)
-
-    # Load dataset
-    dm_dataset = import_dataset(target_dataset_per_task[task_name])
-
-    # Extract sampler config once before the loop
-    sampler_config = train_config.pop("sampler", {})
-
-    # Evaluate all on/off aug combinations
-    for switches in itertools.product([True, False], repeat=len(configurable_augs)):
-        # Configure on/off
-        for aug_name, switch in zip(configurable_augs, switches):
-            aug_found = False
-            for aug_config in train_config["transforms"]:
-                if aug_name in aug_config["class_path"]:
-                    aug_config["enable"] = switch
-                    aug_found = True
-                    break
-            assert aug_found, f"{aug_name} not found in {recipe}"
-        # Create dataset
-        dataset = OTXDatasetFactory.create(
-            task=task,
-            dm_subset=dm_dataset,
-            cfg_subset=SubsetConfig(sampler=SamplerConfig(**sampler_config), **train_config),
-        )
-        # Check if all aug combinations are size-compatible
-        sample = dataset[0]
-        assert sample.image.shape == (3, input_size, input_size)
-
-
-CLS_RECIPES = [recipe for recipe in pytest.RECIPE_LIST if "_cls" in recipe and "tv_" not in recipe]
-DET_RECIPES = [recipe for recipe in pytest.RECIPE_LIST if "/detection/" in recipe and "deim" not in recipe]
-INST_SEG_RECIPES = [recipe for recipe in pytest.RECIPE_LIST if "/instance_segmentation/" in recipe]
-SEM_SEG_RECIPES = [recipe for recipe in pytest.RECIPE_LIST if "/semantic_segmentation/" in recipe]
-KP_DET_RECIPES = [recipe for recipe in pytest.RECIPE_LIST if "/keypoint_detection/" in recipe]
-
-
-@pytest.mark.parametrize("recipe", CLS_RECIPES + DET_RECIPES + INST_SEG_RECIPES)
-def test_augmentation(
-    recipe: str,
-    fxt_target_dataset_per_task: dict,
-):
-    configurable_augs = [
-        "RandomPhotometricDistort",
-        "RandomAffine",
-        "RandomVerticalFlip",
-        "RandomGaussianBlur",
-        "RandomFlip",
-        "RandomGaussianNoise",
-    ]
-    _test_augmentation(recipe, fxt_target_dataset_per_task, configurable_augs)
-
-
-@pytest.mark.parametrize("recipe", SEM_SEG_RECIPES)
-def test_augmentation_seg(
-    recipe: str,
-    fxt_target_dataset_per_task: dict,
-):
-    configurable_augs = [
-        "PhotoMetricDistortion",
-        "RandomAffine",
-        "RandomVerticalFlip",
-        "RandomGaussianBlur",
-        "RandomFlip",
-        "RandomGaussianNoise",
-    ]
-    _test_augmentation(recipe, fxt_target_dataset_per_task, configurable_augs)
-
-
-@pytest.mark.parametrize("recipe", KP_DET_RECIPES)
-def test_augmentation_kp_det(
-    recipe: str,
-    fxt_target_dataset_per_task: dict,
-):
-    configurable_augs = [
-        "RandomPhotometricDistort",
-        "RandomGaussianBlur",
-        "RandomGaussianNoise",
-    ]
-    _test_augmentation(recipe, fxt_target_dataset_per_task, configurable_augs)
diff --git a/library/tests/integration/api/test_engine.py b/library/tests/integration/api/test_engine.py
index ddff67aff89..047fe192975 100644
--- a/library/tests/integration/api/test_engine.py
+++ b/library/tests/integration/api/test_engine.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Integration test for the core OTX engine workflow.
@@ -89,6 +89,11 @@ class _TaskSpec(NamedTuple):
         recipe_name="rtmpose_tiny",
         dataset_dir="keypoint_detection_coco",
     ),
+    _TaskSpec(
+        task=OTXTaskType.MULTI_CLASS_CLS,
+        recipe_name="tv_mobilenet_v3_small",
+        dataset_dir="classification_dataset_16bit",
+    ),
 ]
 
 
@@ -117,7 +122,10 @@ def _resolve_recipe(spec: _TaskSpec) -> str:
 
 def _id_fn(spec: _TaskSpec) -> str:
     """Readable test-ID for ``pytest.mark.parametrize``."""
-    return f"{spec.task.value}-{spec.recipe_name}"
+    base = f"{spec.task.value}-{spec.recipe_name}"
+    if "16bit" in spec.dataset_dir:
+        return f"{base}-16bit"
+    return base
 
 
 # Filter specs based on ``--task`` CLI option (populated via ``pytest.TASK_LIST``
diff --git a/library/tests/integration/conftest.py b/library/tests/integration/conftest.py
index 7f4a88de9f5..21e340ebb3d 100644
--- a/library/tests/integration/conftest.py
+++ b/library/tests/integration/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 from __future__ import annotations
@@ -150,6 +150,7 @@ def fxt_asset_dir() -> Path:
 def fxt_target_dataset_per_task() -> dict:
     return {
         "multi_class_cls": "tests/assets/classification_cifar10",
+        "multi_class_cls_16bit": "tests/assets/classification_dataset_16bit",
         "multi_label_cls": "tests/assets/multilabel_classification_coco",
         "h_label_cls": "tests/assets/hierarchical_classification_cifar100",
         "detection": "tests/assets/detection_coco",
diff --git a/library/tests/perf_v2/tasks/detection.py b/library/tests/perf_v2/tasks/detection.py
index 1ddfbb6637d..b6705ccd904 100644
--- a/library/tests/perf_v2/tasks/detection.py
+++ b/library/tests/perf_v2/tasks/detection.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """OTX object detection performance benchmark."""
@@ -28,11 +28,7 @@
     ModelInfo(task=TASK_TYPE.value, name="deim_dfine_m", category="other"),
     ModelInfo(task=TASK_TYPE.value, name="deimv2_m", category="other"),
     ModelInfo(task=TASK_TYPE.value, name="deimv2_s", category="other"),
-    ModelInfo(task=TASK_TYPE.value, name="atss_resnext101", category="other"),
-    ModelInfo(task=TASK_TYPE.value, name="rtdetr_101", category="other"),
-    ModelInfo(task=TASK_TYPE.value, name="rtdetr_18", category="other"),
     ModelInfo(task=TASK_TYPE.value, name="rtdetr_50", category="other"),
-    ModelInfo(task=TASK_TYPE.value, name="rtmdet_tiny", category="other"),
     ModelInfo(task=TASK_TYPE.value, name="ssd_mobilenetv2", category="other"),
     ModelInfo(task=TASK_TYPE.value, name="yolox_tiny", category="other"),
     ModelInfo(task=TASK_TYPE.value, name="yolox_l", category="other"),
diff --git a/library/tests/test_helpers.py b/library/tests/test_helpers.py
new file mode 100644
index 00000000000..a686d45f798
--- /dev/null
+++ b/library/tests/test_helpers.py
@@ -0,0 +1,64 @@
+# Copyright (C) 2024-2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+"""Helper functions for tests."""
+
+from pathlib import Path
+
+import numpy as np
+
+
+def generate_random_bboxes(
+    image_width: int,
+    image_height: int,
+    num_boxes: int,
+    min_width: int = 10,
+    min_height: int = 10,
+) -> np.ndarray:
+    """Generate random bounding boxes.
+
+    Parameters:
+        image_width (int): Width of the image.
+        image_height (int): Height of the image.
+        num_boxes (int): Number of bounding boxes to generate.
+        min_width (int): Minimum width of the bounding box. Default is 10.
+        min_height (int): Minimum height of the bounding box. Default is 10.
+
+    Returns:
+        ndarray: A NumPy array of shape (num_boxes, 4) representing bounding boxes
+            in format (x_min, y_min, x_max, y_max).
+    """
+    max_width = image_width - min_width
+    max_height = image_height - min_height
+
+    bg = np.random.MT19937(seed=42)
+    rg = np.random.Generator(bg)
+
+    x_min = rg.integers(0, max_width, size=num_boxes)
+    y_min = rg.integers(0, max_height, size=num_boxes)
+    x_max = x_min + rg.integers(min_width, image_width, size=num_boxes)
+    y_max = y_min + rg.integers(min_height, image_height, size=num_boxes)
+
+    x_max[x_max > image_width] = image_width
+    y_max[y_max > image_height] = image_height
+    areas = (x_max - x_min) * (y_max - y_min)
+    bboxes = np.column_stack((x_min, y_min, x_max, y_max))
+    return bboxes[areas > 0]
+
+
+def find_folder(base_path: Path, folder_name: str) -> Path:
+    """Find the folder with the given name within the specified base path.
+
+    Args:
+        base_path (Path): The base path to search within.
+        folder_name (str): The name of the folder to find.
+
+    Returns:
+        Path: The path to the folder.
+    """
+    for folder_path in base_path.rglob(folder_name):
+        if folder_path.is_dir():
+            return folder_path
+    msg = f"Folder {folder_name} not found in {base_path}."
+    raise FileNotFoundError(msg)
diff --git a/library/tests/unit/backend/native/callbacks/test_aug_scheduler.py b/library/tests/unit/backend/native/callbacks/test_aug_scheduler.py
index 53063721c98..23214655c67 100644
--- a/library/tests/unit/backend/native/callbacks/test_aug_scheduler.py
+++ b/library/tests/unit/backend/native/callbacks/test_aug_scheduler.py
@@ -1,7 +1,9 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-"""Tests for data augmentation scheduler components."""
+"""Tests for data augmentation scheduler components (CPU/GPU pipeline)."""
+
+from __future__ import annotations
 
 import secrets
 from multiprocessing import Value
@@ -10,421 +12,496 @@
 import pytest
 import torch
 from lightning.pytorch import LightningModule, Trainer
-from torchvision.transforms.v2 import Compose, ToDtype
+from lightning.pytorch.callbacks.callback import Callback
 
 from otx.backend.native.callbacks.aug_scheduler import AugmentationSchedulerCallback, DataAugSwitch
-
-
+from otx.data.augmentation import CPUAugmentationPipeline, GPUAugmentationPipeline
+
+# ---------------------------------------------------------------------------
+# Helpers / fixtures shared across test classes
+# ---------------------------------------------------------------------------
+
+
+def _make_minimal_policies(
+    *,
+    cpu_class: str = "otx.data.augmentation.transforms.Resize",
+    gpu_class: str = "kornia.augmentation.Normalize",
+) -> dict:
+    """Return a 4-policy dict with simple Resize (CPU) + Normalize (GPU)."""
+    cpu_entry = {
+        "class_path": cpu_class,
+        "init_args": {"size": [640, 640], "keep_aspect_ratio": False},
+    }
+    gpu_entry = {
+        "class_path": gpu_class,
+        "init_args": {"mean": [0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225]},
+    }
+    gpu_entry_flip = {
+        "class_path": "kornia.augmentation.RandomHorizontalFlip",
+        "init_args": {"p": 0.5},
+    }
+    return {
+        "no_aug": {
+            "augmentations_cpu": [cpu_entry],
+            "augmentations_gpu": [gpu_entry],
+        },
+        "strong_aug_1": {
+            "augmentations_cpu": [cpu_entry],
+            "augmentations_gpu": [gpu_entry_flip, gpu_entry],
+        },
+        "strong_aug_2": {
+            "augmentations_cpu": [cpu_entry],
+            "augmentations_gpu": [gpu_entry_flip, gpu_entry],
+        },
+        "light_aug": {
+            "augmentations_cpu": [cpu_entry],
+            "augmentations_gpu": [gpu_entry],
+        },
+    }
+
+
+POLICY_EPOCHS = [4, 23]
+
+
+# ===================================================================
+# TestDataAugSwitch
+# ===================================================================
 class TestDataAugSwitch:
-    """Test cases for DataAugSwitch."""
+    """Tests for DataAugSwitch with the CPU/GPU pipeline architecture."""
+
+    # -- fixtures -------------------------------------------------------
 
     @pytest.fixture
-    def sample_policies(self):
-        """Create sample augmentation policies."""
-        return {
-            "no_aug": {
-                "to_tv_image": True,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.float32"}},
-                ],
-            },
-            "strong_aug_1": {
-                "to_tv_image": True,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.float32"}},
-                ],
-            },
-            "strong_aug_2": {
-                "to_tv_image": False,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.int32"}},
-                ],
-            },
-            "light_aug": {
-                "to_tv_image": True,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.float32"}},
-                ],
-            },
-        }
+    def policies(self):
+        return _make_minimal_policies()
 
     @pytest.fixture
-    def policy_epochs(self):
-        """Create sample policy epochs."""
-        return [4, 29, 50]
+    def switch(self, policies):
+        return DataAugSwitch(POLICY_EPOCHS, policies, input_size=[640, 640])
 
     @pytest.fixture
-    def data_aug_switch(self, policy_epochs, sample_policies):
-        """Create a DataAugSwitch instance."""
-        with patch("otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate") as mock_generate:
-            # Mock the transform generation to return simple transforms
-            mock_generate.return_value = Compose([ToDtype(dtype=torch.float32)])
-            return DataAugSwitch(policy_epochs, sample_policies)
-
-    def test_init_valid_policy_epochs(self, policy_epochs, sample_policies):
-        """Test DataAugSwitch initialization with valid policy epochs."""
-        with patch("otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate") as mock_generate:
-            mock_generate.return_value = Compose([ToDtype(dtype=torch.float32)])
-            switch = DataAugSwitch(policy_epochs, sample_policies)
-
-            assert switch.policy_epochs == policy_epochs
-            assert len(switch.policies) == len(sample_policies)
-            assert switch._shared_epoch is None
-
-    def test_init_invalid_policy_epochs(self, sample_policies):
-        """Test DataAugSwitch initialization with invalid policy epochs."""
-        invalid_epochs = [4, 29]  # Only 2 epochs instead of 3
-
-        with pytest.raises(ValueError, match="Expected 3 policy epochs"):
-            DataAugSwitch(invalid_epochs, sample_policies)
-
-    def test_set_shared_epoch(self, data_aug_switch):
-        """Test setting shared epoch."""
-        shared_epoch = Value("i", 0)
-        data_aug_switch.set_shared_epoch(shared_epoch)
-
-        assert data_aug_switch._shared_epoch is shared_epoch
-
-    def test_epoch_property_without_shared_epoch(self, data_aug_switch):
-        """Test epoch property when shared epoch is not set."""
-        with pytest.raises(ValueError, match="Shared epoch not set"):
-            _ = data_aug_switch.epoch
+    def switch_with_epoch(self, switch):
+        """Switch with a shared epoch pre-set to 0."""
+        switch.set_shared_epoch(Value("i", 0))
+        return switch
+
+    # -- init -----------------------------------------------------------
+
+    def test_init_stores_policy_epochs(self, switch):
+        assert switch.policy_epochs == POLICY_EPOCHS
+
+    def test_init_stores_input_size_as_tuple(self, switch):
+        assert switch.input_size == (640, 640)
+
+    def test_init_builds_cpu_pipeline_per_policy(self, switch):
+        for name in ("no_aug", "strong_aug_1", "strong_aug_2", "light_aug"):
+            assert name in switch.policies
+            assert isinstance(switch.policies[name]["cpu_pipeline"], CPUAugmentationPipeline)
+
+    def test_init_stores_gpu_configs_per_policy(self, switch):
+        for name in ("no_aug", "strong_aug_1", "strong_aug_2", "light_aug"):
+            assert isinstance(switch.policies[name]["gpu_aug_configs"], list)
+            assert len(switch.policies[name]["gpu_aug_configs"]) >= 1
+
+    def test_init_invalid_policy_epochs_length(self, policies):
+        with pytest.raises(ValueError, match="Expected 2 policy epochs"):
+            DataAugSwitch([4, 29, 50], policies)
+
+    def test_init_no_input_size(self, policies):
+        switch = DataAugSwitch(POLICY_EPOCHS, policies, input_size=None)
+        assert switch.input_size is None
+
+    def test_init_empty_gpu_augmentations(self):
+        """Policy with no GPU augmentations should store empty list."""
+        policies = {
+            name: {
+                "augmentations_cpu": [
+                    {
+                        "class_path": "otx.data.augmentation.transforms.Resize",
+                        "init_args": {"size": [640, 640], "keep_aspect_ratio": False},
+                    },
+                ],
+            }
+            for name in ("no_aug", "strong_aug_1", "strong_aug_2", "light_aug")
+        }
+        switch = DataAugSwitch(POLICY_EPOCHS, policies, input_size=[640, 640])
+        assert switch.get_gpu_aug_configs("no_aug") == []
 
-    def test_epoch_property_with_shared_epoch(self, data_aug_switch):
-        """Test epoch property when shared epoch is set."""
-        shared_epoch = Value("i", 10)
-        data_aug_switch.set_shared_epoch(shared_epoch)
+    # -- shared epoch ---------------------------------------------------
 
-        assert data_aug_switch.epoch == 10
+    def test_set_shared_epoch(self, switch):
+        v = Value("i", 7)
+        switch.set_shared_epoch(v)
+        assert switch._shared_epoch is v
 
-    def test_epoch_setter_without_shared_epoch(self, data_aug_switch):
-        """Test epoch setter when shared epoch is not set."""
+    def test_epoch_getter_raises_without_shared(self, switch):
         with pytest.raises(ValueError, match="Shared epoch not set"):
-            data_aug_switch.epoch = 5
-
-    def test_epoch_setter_with_shared_epoch(self, data_aug_switch):
-        """Test epoch setter when shared epoch is set."""
-        shared_epoch = Value("i", 0)
-        data_aug_switch.set_shared_epoch(shared_epoch)
-
-        data_aug_switch.epoch = 15
-        assert data_aug_switch.epoch == 15
-        assert shared_epoch.value == 15
-
-    def test_current_policy_name_no_aug_stage(self, data_aug_switch):
-        """Test current_policy_name in no_aug stage (epoch < 4)."""
-        shared_epoch = Value("i", 2)
-        data_aug_switch.set_shared_epoch(shared_epoch)
-
-        assert data_aug_switch.current_policy_name == "no_aug"
-
-    def test_current_policy_name_strong_aug_stage(self, data_aug_switch):
-        """Test current_policy_name in strong_aug stage (4 <= epoch < 29)."""
-        shared_epoch = Value("i", 15)
-        data_aug_switch.set_shared_epoch(shared_epoch)
+            _ = switch.epoch
 
-        with patch.object(secrets, "choice", return_value="strong_aug_1"):
-            policy_name = data_aug_switch.current_policy_name
-            assert policy_name in ["strong_aug_1", "strong_aug_2"]
-
-    def test_current_policy_name_light_aug_stage(self, data_aug_switch):
-        """Test current_policy_name in light_aug stage (epoch >= 29)."""
-        shared_epoch = Value("i", 35)
-        data_aug_switch.set_shared_epoch(shared_epoch)
-
-        assert data_aug_switch.current_policy_name == "light_aug"
-
-    def test_current_policy_name_boundary_conditions(self, data_aug_switch):
-        """Test current_policy_name at boundary conditions."""
-        shared_epoch = Value("i", 0)
-        data_aug_switch.set_shared_epoch(shared_epoch)
-
-        # Test exact boundary values
-        test_cases = [
-            (3, "no_aug"),  # Just before first boundary
-            (4, "strong_aug_1"),  # At first boundary (mocked)
-            (28, "strong_aug_2"),  # Just before second boundary (mocked)
-            (29, "light_aug"),  # At second boundary
-            (50, "light_aug"),  # Beyond all boundaries
-        ]
-
-        for epoch, expected_stage in test_cases:
-            data_aug_switch.epoch = epoch
-            if expected_stage in ["strong_aug_1", "strong_aug_2"]:
-                with patch.object(secrets, "choice", return_value=expected_stage):
-                    assert data_aug_switch.current_policy_name == expected_stage
-            else:
-                assert data_aug_switch.current_policy_name == expected_stage
-
-    def test_current_transforms_property(self, data_aug_switch):
-        """Test current_transforms property."""
-        shared_epoch = Value("i", 2)
-        data_aug_switch.set_shared_epoch(shared_epoch)
-
-        to_tv_image, transforms = data_aug_switch.current_transforms
-
-        assert isinstance(to_tv_image, bool)
-        assert isinstance(transforms, Compose)
-
-    def test_secrets_choice_randomness(self, data_aug_switch):
-        """Test that secrets.choice is used for random selection."""
-        shared_epoch = Value("i", 15)  # In strong_aug stage
-        data_aug_switch.set_shared_epoch(shared_epoch)
+    def test_epoch_setter_raises_without_shared(self, switch):
+        with pytest.raises(ValueError, match="Shared epoch not set"):
+            switch.epoch = 5
 
-        with patch.object(secrets, "choice") as mock_choice:
-            mock_choice.return_value = "strong_aug_1"
-            policy_name = data_aug_switch.current_policy_name
+    def test_epoch_getter_and_setter(self, switch_with_epoch):
+        switch_with_epoch.epoch = 12
+        assert switch_with_epoch.epoch == 12
 
-            mock_choice.assert_called_once_with(["strong_aug_1", "strong_aug_2"])
-            assert policy_name == "strong_aug_1"
+    # -- current_policy_name (stochastic) --------------------------------
 
-    def test_policy_processing_during_init(self, policy_epochs, sample_policies):
-        """Test that policies are properly processed during initialization."""
-        with patch("otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate") as mock_generate:
-            mock_transform = Compose([ToDtype(dtype=torch.float32)])
-            mock_generate.return_value = mock_transform
+    def test_policy_name_no_aug_stage(self, switch_with_epoch):
+        for e in (0, 1, 3):
+            switch_with_epoch.epoch = e
+            assert switch_with_epoch.current_policy_name == "no_aug"
 
-            switch = DataAugSwitch(policy_epochs, sample_policies)
+    def test_policy_name_strong_aug_stage_random(self, switch_with_epoch):
+        switch_with_epoch.epoch = 10
+        with patch.object(secrets, "choice", return_value="strong_aug_2") as m:
+            assert switch_with_epoch.current_policy_name == "strong_aug_2"
+            m.assert_called_once_with(["strong_aug_1", "strong_aug_2"])
 
-            # Check that generate was called for each policy
-            assert mock_generate.call_count == len(sample_policies)
+    def test_policy_name_light_aug_stage(self, switch_with_epoch):
+        for e in (23, 30, 40, 100):
+            switch_with_epoch.epoch = e
+            assert switch_with_epoch.current_policy_name == "light_aug"
 
-            # Check that policies were processed correctly
-            for policy_name in sample_policies:
-                assert policy_name in switch.policies
-                assert "to_tv_image" in switch.policies[policy_name]
-                assert "transforms" in switch.policies[policy_name]
-                assert switch.policies[policy_name]["transforms"] is mock_transform
+    def test_policy_name_boundary_at_p0(self, switch_with_epoch):
+        """epoch == p0 should enter strong_aug stage."""
+        switch_with_epoch.epoch = 4
+        with patch.object(secrets, "choice", return_value="strong_aug_1"):
+            assert switch_with_epoch.current_policy_name == "strong_aug_1"
+
+    def test_policy_name_boundary_at_p1(self, switch_with_epoch):
+        """epoch == p1 should enter light_aug stage."""
+        switch_with_epoch.epoch = 23
+        assert switch_with_epoch.current_policy_name == "light_aug"
+
+    # -- get_cpu_pipeline -----------------------------------------------
+
+    def test_get_cpu_pipeline_returns_correct_type(self, switch_with_epoch):
+        pipeline = switch_with_epoch.get_cpu_pipeline("no_aug")
+        assert isinstance(pipeline, CPUAugmentationPipeline)
+
+    def test_get_cpu_pipeline_returns_different_per_policy(self, switch_with_epoch):
+        no_aug = switch_with_epoch.get_cpu_pipeline("no_aug")
+        light_aug = switch_with_epoch.get_cpu_pipeline("light_aug")
+        assert isinstance(no_aug, CPUAugmentationPipeline)
+        assert isinstance(light_aug, CPUAugmentationPipeline)
+
+    def test_get_cpu_pipeline_invalid_name_raises(self, switch_with_epoch):
+        with pytest.raises(KeyError):
+            switch_with_epoch.get_cpu_pipeline("nonexistent_policy")
+
+    # -- get_gpu_aug_configs --------------------------------------------
+
+    def test_get_gpu_aug_configs_returns_list(self, switch):
+        configs = switch.get_gpu_aug_configs("no_aug")
+        assert isinstance(configs, list)
+        assert len(configs) == 1  # just Normalize
+
+    def test_get_gpu_aug_configs_strong_has_flip(self, switch):
+        configs = switch.get_gpu_aug_configs("strong_aug_1")
+        class_paths = [c["class_path"] for c in configs]
+        assert "kornia.augmentation.RandomHorizontalFlip" in class_paths
+
+    # -- build_gpu_pipeline ---------------------------------------------
+
+    def test_build_gpu_pipeline_returns_correct_type(self, switch):
+        gpu = switch.build_gpu_pipeline("no_aug", data_keys=["input", "bbox_xyxy", "label"])
+        assert isinstance(gpu, GPUAugmentationPipeline)
+
+    def test_build_gpu_pipeline_data_keys_propagated(self, switch):
+        keys = ["input", "bbox_xyxy", "label"]
+        gpu = switch.build_gpu_pipeline("strong_aug_1", data_keys=keys)
+        assert gpu.data_keys == keys
+
+    def test_build_gpu_pipeline_empty_configs(self):
+        """Policy with no GPU augmentations produces an empty pipeline."""
+        policies = {
+            name: {
+                "augmentations_cpu": [
+                    {
+                        "class_path": "otx.data.augmentation.transforms.Resize",
+                        "init_args": {"size": [640, 640], "keep_aspect_ratio": False},
+                    },
+                ],
+            }
+            for name in ("no_aug", "strong_aug_1", "strong_aug_2", "light_aug")
+        }
+        switch = DataAugSwitch(POLICY_EPOCHS, policies, input_size=[640, 640])
+        gpu = switch.build_gpu_pipeline("no_aug")
+        assert isinstance(gpu, GPUAugmentationPipeline)
 
 
+# ===================================================================
+# TestAugmentationSchedulerCallback
+# ===================================================================
 class TestAugmentationSchedulerCallback:
-    """Test cases for AugmentationSchedulerCallback."""
+    """Tests for AugmentationSchedulerCallback with GPU pipeline swapping."""
 
-    @pytest.fixture
-    def mock_data_aug_switch(self):
-        """Create a mock DataAugSwitch."""
-        return MagicMock(spec=DataAugSwitch)
+    # -- fixtures -------------------------------------------------------
 
     @pytest.fixture
-    def callback_with_switch(self, mock_data_aug_switch):
-        """Create callback with DataAugSwitch."""
-        return AugmentationSchedulerCallback(mock_data_aug_switch)
+    def switch(self):
+        s = DataAugSwitch(POLICY_EPOCHS, _make_minimal_policies(), input_size=[640, 640])
+        s.set_shared_epoch(Value("i", 0))
+        return s
 
     @pytest.fixture
-    def callback_without_switch(self):
-        """Create callback without DataAugSwitch."""
-        return AugmentationSchedulerCallback()
+    def mock_gpu_callback(self):
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
+
+        cb = MagicMock(spec=GPUAugmentationCallback)
+        mock_pipeline = MagicMock(spec=GPUAugmentationPipeline)
+        mock_pipeline.data_keys = ["input", "bbox_xyxy", "label"]
+        cb._train_pipeline = mock_pipeline
+        return cb
 
     @pytest.fixture
-    def mock_trainer(self):
-        """Create a mock trainer."""
+    def mock_trainer(self, mock_gpu_callback):
         trainer = MagicMock(spec=Trainer)
-        trainer.current_epoch = 10
+        trainer.current_epoch = 0
+        trainer.callbacks = [mock_gpu_callback]
         return trainer
 
     @pytest.fixture
     def mock_pl_module(self):
-        """Create a mock Lightning module."""
-        return MagicMock(spec=LightningModule)
+        pl = MagicMock(spec=LightningModule)
+        param = torch.nn.Parameter(torch.zeros(1))
+        pl.parameters.side_effect = lambda: iter([param])
+        pl.device = torch.device("cpu")
+        return pl
 
-    def test_init_with_data_aug_switch(self, mock_data_aug_switch):
-        """Test callback initialization with DataAugSwitch."""
-        callback = AugmentationSchedulerCallback(mock_data_aug_switch)
-
-        assert callback.data_aug_switch is mock_data_aug_switch
-
-    def test_init_without_data_aug_switch(self):
-        """Test callback initialization without DataAugSwitch."""
-        callback = AugmentationSchedulerCallback()
-
-        assert callback.data_aug_switch is None
+    @pytest.fixture
+    def callback(self, switch):
+        return AugmentationSchedulerCallback(data_aug_switch=switch)
 
-    def test_set_data_aug_switch(self, callback_without_switch, mock_data_aug_switch):
-        """Test setting DataAugSwitch after initialization."""
-        callback_without_switch.set_data_aug_switch(mock_data_aug_switch)
+    # -- init -----------------------------------------------------------
 
-        assert callback_without_switch.data_aug_switch is mock_data_aug_switch
+    def test_inherits_from_lightning_callback(self, callback):
+        assert isinstance(callback, Callback)
 
-    def test_on_train_epoch_start_with_switch(self, callback_with_switch, mock_trainer, mock_pl_module):
-        """Test on_train_epoch_start when DataAugSwitch is available."""
-        callback_with_switch.on_train_epoch_start(mock_trainer, mock_pl_module)
+    def test_init_with_switch(self, switch):
+        cb = AugmentationSchedulerCallback(data_aug_switch=switch)
+        assert cb.data_aug_switch is switch
+        assert cb._gpu_aug_callback is None
+        assert cb._last_gpu_policy is None
 
-        # Check that epoch was set on the DataAugSwitch
-        assert callback_with_switch.data_aug_switch.epoch == mock_trainer.current_epoch
+    def test_init_without_switch(self):
+        cb = AugmentationSchedulerCallback()
+        assert cb.data_aug_switch is None
 
-    def test_on_train_epoch_start_without_switch(self, callback_without_switch, mock_trainer, mock_pl_module):
-        """Test on_train_epoch_start when DataAugSwitch is not available."""
-        # This should not raise an exception but will fail due to None
-        with pytest.raises(AttributeError):
-            callback_without_switch.on_train_epoch_start(mock_trainer, mock_pl_module)
+    # -- setup ----------------------------------------------------------
 
-    def test_on_train_epoch_start_updates_epoch(self, callback_with_switch, mock_pl_module):
-        """Test that on_train_epoch_start updates epoch correctly."""
-        # Test different epoch values
-        for epoch in [0, 5, 10, 25, 50]:
-            mock_trainer = MagicMock(spec=Trainer)
-            mock_trainer.current_epoch = epoch
+    def test_setup_finds_gpu_callback(self, callback, mock_trainer, mock_pl_module, mock_gpu_callback):
+        callback.setup(mock_trainer, mock_pl_module, stage="fit")
+        assert callback._gpu_aug_callback is mock_gpu_callback
 
-            callback_with_switch.on_train_epoch_start(mock_trainer, mock_pl_module)
+    def test_setup_no_gpu_callback(self, callback, mock_pl_module):
+        trainer = MagicMock(spec=Trainer)
+        trainer.callbacks = []
+        callback.setup(trainer, mock_pl_module, stage="fit")
+        assert callback._gpu_aug_callback is None
 
-            assert callback_with_switch.data_aug_switch.epoch == epoch
+    # -- set_data_aug_switch --------------------------------------------
 
-    def test_callback_inheritance(self, callback_with_switch):
-        """Test that callback properly inherits from Lightning Callback."""
-        from lightning.pytorch.callbacks.callback import Callback
+    def test_set_data_aug_switch(self, callback, switch):
+        new_switch = MagicMock(spec=DataAugSwitch)
+        callback.set_data_aug_switch(new_switch)
+        assert callback.data_aug_switch is new_switch
 
-        assert isinstance(callback_with_switch, Callback)
+    # -- on_train_epoch_start -------------------------------------------
 
-    def test_set_data_aug_switch_replaces_existing(self, callback_with_switch):
-        """Test that set_data_aug_switch replaces existing switch."""
-        original_switch = callback_with_switch.data_aug_switch
-        new_switch = MagicMock(spec=DataAugSwitch)
+    def test_epoch_start_noop_when_no_switch(self, mock_trainer, mock_pl_module):
+        cb = AugmentationSchedulerCallback(data_aug_switch=None)
+        cb.on_train_epoch_start(mock_trainer, mock_pl_module)  # should not raise
 
-        callback_with_switch.set_data_aug_switch(new_switch)
+    def test_epoch_start_updates_shared_epoch(self, callback, mock_trainer, mock_pl_module, mock_gpu_callback):
+        callback.setup(mock_trainer, mock_pl_module, stage="fit")
+        mock_trainer.current_epoch = 7
+        callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        assert callback.data_aug_switch.epoch == 7
 
-        assert callback_with_switch.data_aug_switch is new_switch
-        assert callback_with_switch.data_aug_switch is not original_switch
+    def test_epoch_start_swaps_gpu_on_phase_change(self, callback, mock_trainer, mock_pl_module, mock_gpu_callback):
+        callback.setup(mock_trainer, mock_pl_module, stage="fit")
+        mock_trainer.current_epoch = 0
+        callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        assert callback._last_gpu_policy == "no_aug"
+        assert mock_gpu_callback._train_pipeline is not None
 
-    def test_multiple_epoch_updates(self, callback_with_switch, mock_pl_module):
-        """Test multiple epoch updates during training."""
-        epochs = [0, 1, 2, 5, 10, 15, 20, 25, 30]
+    def test_epoch_start_no_swap_same_phase(self, callback, mock_trainer, mock_pl_module, mock_gpu_callback):
+        callback.setup(mock_trainer, mock_pl_module, stage="fit")
 
-        for epoch in epochs:
-            mock_trainer = MagicMock(spec=Trainer)
-            mock_trainer.current_epoch = epoch
+        mock_trainer.current_epoch = 0
+        callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        assert callback._last_gpu_policy == "no_aug"
 
-            callback_with_switch.on_train_epoch_start(mock_trainer, mock_pl_module)
+        mock_trainer.current_epoch = 1
+        callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        assert callback._last_gpu_policy == "no_aug"
 
-            # Verify the epoch was set correctly
-            callback_with_switch.data_aug_switch.epoch = epoch
+    def test_epoch_start_detects_phase_transitions(self, callback, mock_trainer, mock_pl_module, mock_gpu_callback):
+        callback.setup(mock_trainer, mock_pl_module, stage="fit")
 
+        mock_trainer.current_epoch = 0
+        callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        assert callback._last_gpu_policy == "no_aug"
 
-class TestDataAugSwitchIntegration:
-    """Integration tests for DataAugSwitch and AugmentationSchedulerCallback."""
+        mock_trainer.current_epoch = 4
+        callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        assert callback._last_gpu_policy in ("strong_aug_1", "strong_aug_2")
 
-    @pytest.fixture
-    def sample_policies(self):
-        """Create sample augmentation policies."""
-        return {
-            "no_aug": {
-                "to_tv_image": True,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.float32"}},
-                ],
-            },
-            "strong_aug_1": {
-                "to_tv_image": True,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.float32"}},
-                ],
-            },
-            "strong_aug_2": {
-                "to_tv_image": False,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.int32"}},
-                ],
-            },
-            "light_aug": {
-                "to_tv_image": True,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.float32"}},
+        mock_trainer.current_epoch = 23
+        callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        assert callback._last_gpu_policy == "light_aug"
+
+    # -- _swap_gpu_pipeline ---------------------------------------------
+
+    def test_swap_gpu_pipeline_noop_without_gpu_callback(self, callback, mock_pl_module):
+        callback._gpu_aug_callback = None
+        callback._swap_gpu_pipeline("no_aug", mock_pl_module)  # should not crash
+
+    def test_swap_gpu_pipeline_noop_without_switch(self, mock_pl_module):
+        cb = AugmentationSchedulerCallback(data_aug_switch=None)
+        cb._gpu_aug_callback = MagicMock()
+        cb._swap_gpu_pipeline("no_aug", mock_pl_module)  # should not crash
+
+    def test_swap_gpu_pipeline_builds_and_assigns(self, callback, mock_trainer, mock_pl_module, mock_gpu_callback):
+        callback.setup(mock_trainer, mock_pl_module, stage="fit")
+        callback._swap_gpu_pipeline("strong_aug_1", mock_pl_module)
+        new_pipeline = mock_gpu_callback._train_pipeline
+        assert isinstance(new_pipeline, GPUAugmentationPipeline)
+
+    def test_swap_gpu_pipeline_preserves_data_keys(self, callback, mock_trainer, mock_pl_module, mock_gpu_callback):
+        callback.setup(mock_trainer, mock_pl_module, stage="fit")
+        callback._swap_gpu_pipeline("strong_aug_1", mock_pl_module)
+        new_pipeline = mock_gpu_callback._train_pipeline
+        assert isinstance(new_pipeline, GPUAugmentationPipeline)
+        assert new_pipeline.data_keys == ["input", "bbox_xyxy", "label"]
+
+    def test_swap_gpu_pipeline_skips_empty_configs(self, mock_pl_module):
+        """If a policy has no GPU augmentations, keep the current pipeline."""
+        policies = {
+            name: {
+                "augmentations_cpu": [
+                    {
+                        "class_path": "otx.data.augmentation.transforms.Resize",
+                        "init_args": {"size": [640, 640], "keep_aspect_ratio": False},
+                    },
                 ],
-            },
+            }
+            for name in ("no_aug", "strong_aug_1", "strong_aug_2", "light_aug")
         }
+        switch = DataAugSwitch(POLICY_EPOCHS, policies, input_size=[640, 640])
+        switch.set_shared_epoch(Value("i", 0))
 
-    @pytest.fixture
-    def integration_setup(self, sample_policies):
-        """Set up DataAugSwitch and AugmentationSchedulerCallback for integration testing."""
-        with patch("otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate") as mock_generate:
-            mock_generate.return_value = Compose([ToDtype(dtype=torch.float32)])
-
-            # Create DataAugSwitch
-            switch = DataAugSwitch([4, 29, 50], sample_policies)
-
-            # Create shared epoch
-            shared_epoch = Value("i", 0)
-            switch.set_shared_epoch(shared_epoch)
-
-            # Create callback
-            callback = AugmentationSchedulerCallback(switch)
-
-            return switch, callback, shared_epoch
-
-    def test_full_training_simulation(self, integration_setup):
-        """Test full training simulation with epoch updates."""
-        switch, callback, shared_epoch = integration_setup
-
-        # Simulate training epochs
-        test_epochs = [0, 3, 4, 15, 28, 29, 35, 50]
-        expected_policies = [
-            "no_aug",
-            "no_aug",
-            "strong_aug",
-            "strong_aug",
-            "strong_aug",
-            "light_aug",
-            "light_aug",
-            "light_aug",
-        ]
-
-        for epoch, expected_policy_type in zip(test_epochs, expected_policies):
-            # Simulate trainer epoch update
-            mock_trainer = MagicMock(spec=Trainer)
-            mock_trainer.current_epoch = epoch
-            mock_pl_module = MagicMock(spec=LightningModule)
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
 
-            # Update epoch via callback
-            callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        mock_gpu_cb = MagicMock(spec=GPUAugmentationCallback)
+        original_pipeline = MagicMock()
+        mock_gpu_cb._train_pipeline = original_pipeline
 
-            # Check that shared epoch was updated
-            assert shared_epoch.value == epoch
-            assert switch.epoch == epoch
+        cb = AugmentationSchedulerCallback(data_aug_switch=switch)
+        cb._gpu_aug_callback = mock_gpu_cb
+        cb._swap_gpu_pipeline("no_aug", mock_pl_module)
 
-            # Check policy type
-            current_policy = switch.current_policy_name
-            if expected_policy_type == "strong_aug":
-                assert current_policy in ["strong_aug_1", "strong_aug_2"]
-            else:
-                assert current_policy == expected_policy_type
+        # After the fix, empty-config policies should clear the GPU pipeline
+        # instead of keeping stale transforms from the previous policy.
+        assert mock_gpu_cb._train_pipeline is not original_pipeline
+        assert isinstance(mock_gpu_cb._train_pipeline, GPUAugmentationPipeline)
+        assert mock_gpu_cb._train_pipeline.aug_sequential is None
 
-    def test_concurrent_access_simulation(self, integration_setup):
-        """Test simulation of concurrent access to shared epoch."""
-        switch, callback, shared_epoch = integration_setup
+    # -- full training simulation ----------------------------------------
 
-        # Simulate callback updating epoch
-        mock_trainer = MagicMock(spec=Trainer)
-        mock_trainer.current_epoch = 15
-        mock_pl_module = MagicMock(spec=LightningModule)
+    def test_full_training_simulation(self, callback, mock_trainer, mock_pl_module, mock_gpu_callback):
+        """Simulate a full training run through all 3 phase transitions."""
+        callback.setup(mock_trainer, mock_pl_module, stage="fit")
 
-        callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        phase_history = []
+        for epoch in range(50):
+            mock_trainer.current_epoch = epoch
+            callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+            phase_history.append(callback._last_gpu_policy)
 
-        # Simulate dataset reading epoch (this would happen in parallel)
-        current_epoch = switch.epoch
-        policy_name = switch.current_policy_name
+        assert all(p == "no_aug" for p in phase_history[:4])
+        assert all(p in ("strong_aug_1", "strong_aug_2") for p in phase_history[4:23])
+        assert all(p == "light_aug" for p in phase_history[23:])
 
-        assert current_epoch == 15
-        assert policy_name in ["strong_aug_1", "strong_aug_2"]
+    def test_error_without_shared_epoch(self):
+        """Callback should propagate ValueError if shared epoch not set."""
+        switch = DataAugSwitch(POLICY_EPOCHS, _make_minimal_policies(), input_size=[640, 640])
+        cb = AugmentationSchedulerCallback(data_aug_switch=switch)
 
-        # Verify both callback and switch see the same epoch
-        assert callback.data_aug_switch.epoch == 15
+        mock_trainer = MagicMock(spec=Trainer)
+        mock_trainer.current_epoch = 10
+        mock_pl_module = MagicMock(spec=LightningModule)
 
-    def test_error_handling_integration(self, sample_policies):
-        """Test error handling in integration scenarios."""
-        with patch("otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate") as mock_generate:
-            mock_generate.return_value = Compose([ToDtype(dtype=torch.float32)])
+        with pytest.raises(ValueError, match="Shared epoch not set"):
+            cb.on_train_epoch_start(mock_trainer, mock_pl_module)
 
-            # Create switch without shared epoch
-            switch = DataAugSwitch([4, 29, 50], sample_policies)
-            callback = AugmentationSchedulerCallback(switch)
 
-            # Try to use without setting shared epoch
-            with pytest.raises(ValueError, match="Shared epoch not set"):
-                _ = switch.current_policy_name
+# ===================================================================
+# Integration-style tests with real pipelines
+# ===================================================================
+class TestDataAugSwitchIntegration:
+    """Integration tests using real CPUAugmentationPipeline and GPUAugmentationPipeline."""
 
-            # Try to update epoch via callback without shared epoch
-            mock_trainer = MagicMock(spec=Trainer)
-            mock_trainer.current_epoch = 10
-            mock_pl_module = MagicMock(spec=LightningModule)
+    @pytest.fixture
+    def switch(self):
+        return DataAugSwitch(
+            policy_epochs=POLICY_EPOCHS,
+            policies=_make_minimal_policies(),
+            input_size=[640, 640],
+        )
+
+    def test_all_policies_have_callable_cpu_pipeline(self, switch):
+        for name, policy in switch.policies.items():
+            pipeline = policy["cpu_pipeline"]
+            assert callable(pipeline), f"CPU pipeline for '{name}' is not callable"
+
+    def test_build_gpu_pipeline_produces_aug_sequential(self, switch):
+        gpu = switch.build_gpu_pipeline("strong_aug_1", data_keys=["input", "bbox_xyxy", "label"])
+        assert gpu.aug_sequential is not None
+        assert len(gpu._augmentations_list) == 2
+
+    def test_gpu_pipeline_normalize_extraction(self, switch):
+        gpu = switch.build_gpu_pipeline("no_aug", data_keys=["input", "bbox_xyxy", "label"])
+        assert gpu.mean is not None
+        assert gpu.std is not None
+        # mean/std may be tuple or tensor depending on implementation
+        mean_t = torch.tensor(gpu.mean) if not isinstance(gpu.mean, torch.Tensor) else gpu.mean
+        std_t = torch.tensor(gpu.std) if not isinstance(gpu.std, torch.Tensor) else gpu.std
+        assert torch.allclose(mean_t, torch.tensor([0.485, 0.456, 0.406]), atol=1e-4)
+        assert torch.allclose(std_t, torch.tensor([0.229, 0.224, 0.225]), atol=1e-4)
+
+    def test_end_to_end_phase_cycle(self):
+        switch = DataAugSwitch(POLICY_EPOCHS, _make_minimal_policies(), input_size=[640, 640])
+        switch.set_shared_epoch(Value("i", 0))
+
+        expected_phases = {
+            0: "no_aug",
+            3: "no_aug",
+            4: ("strong_aug_1", "strong_aug_2"),
+            22: ("strong_aug_1", "strong_aug_2"),
+            23: "light_aug",
+            40: "light_aug",
+        }
 
-            with pytest.raises(ValueError, match="Shared epoch not set"):
-                callback.on_train_epoch_start(mock_trainer, mock_pl_module)
+        for epoch, expected in expected_phases.items():
+            switch.epoch = epoch
+            policy = switch.current_policy_name
+            if isinstance(expected, tuple):
+                assert policy in expected, f"Epoch {epoch}: expected one of {expected}, got {policy}"
+            else:
+                assert policy == expected, f"Epoch {epoch}: expected {expected}, got {policy}"
+
+    def test_concurrent_access_simulation(self):
+        """Shared epoch is mp.Value, safe for concurrent read/write."""
+        switch = DataAugSwitch(POLICY_EPOCHS, _make_minimal_policies(), input_size=[640, 640])
+        shared = Value("i", 0)
+        switch.set_shared_epoch(shared)
+
+        # Simulate callback writing
+        switch.epoch = 15
+        # Simulate worker reading
+        assert switch.epoch == 15
+        assert switch.current_policy_name in ("strong_aug_1", "strong_aug_2")
diff --git a/library/tests/unit/backend/native/models/classification/test_efficientnet.py b/library/tests/unit/backend/native/models/classification/test_efficientnet.py
index cbe1188f66f..0ecba163f0a 100644
--- a/library/tests/unit/backend/native/models/classification/test_efficientnet.py
+++ b/library/tests/unit/backend/native/models/classification/test_efficientnet.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
@@ -58,7 +58,9 @@ def test_set_input_size(self):
             label_info=10,
             data_input_params=data_input_params,
         )
-        assert model.model.backbone.in_size == data_input_params.input_size[-2:]
+        input_size = data_input_params.input_size
+        assert input_size is not None
+        assert model.model.backbone.in_size == input_size[-2:]
 
     def test_freeze_backbone(self):
         data_input_params = DataInputParams((300, 300), (0.0, 0.0, 0.0), (1.0, 1.0, 1.0))
@@ -128,7 +130,9 @@ def test_set_input_size(self):
             label_info=10,
             data_input_params=data_input_params,
         )
-        assert model.model.backbone.in_size == data_input_params.input_size[-2:]
+        input_size = data_input_params.input_size
+        assert input_size is not None
+        assert model.model.backbone.in_size == input_size[-2:]
 
     def test_freeze_backbone(self):
         data_input_params = DataInputParams((300, 300), (0.0, 0.0, 0.0), (1.0, 1.0, 1.0))
@@ -198,7 +202,9 @@ def test_set_input_size(self, fxt_hlabel_data):
             label_info=fxt_hlabel_data,
             data_input_params=data_input_params,
         )
-        assert model.model.backbone.in_size == data_input_params.input_size[-2:]
+        input_size = data_input_params.input_size
+        assert input_size is not None
+        assert model.model.backbone.in_size == input_size[-2:]
 
     def test_freeze_backbone(self, fxt_hlabel_data):
         data_input_params = DataInputParams((300, 300), (0.0, 0.0, 0.0), (1.0, 1.0, 1.0))
diff --git a/library/tests/unit/backend/native/models/classification/test_mobilenet_v3.py b/library/tests/unit/backend/native/models/classification/test_mobilenet_v3.py
index c34a6646c13..f2941226d5b 100644
--- a/library/tests/unit/backend/native/models/classification/test_mobilenet_v3.py
+++ b/library/tests/unit/backend/native/models/classification/test_mobilenet_v3.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
@@ -58,7 +58,9 @@ def test_set_input_size(self):
             label_info=10,
             data_input_params=data_input_params,
         )
-        assert model.model.backbone.in_size == data_input_params.input_size[-2:]
+        input_size = data_input_params.input_size
+        assert input_size is not None
+        assert model.model.backbone.in_size == input_size[-2:]
 
     def test_freeze_backbone(self):
         data_input_params = DataInputParams((300, 300), (0.0, 0.0, 0.0), (1.0, 1.0, 1.0))
@@ -128,7 +130,9 @@ def test_set_input_size(self):
             label_info=10,
             data_input_params=data_input_params,
         )
-        assert model.model.backbone.in_size == data_input_params.input_size[-2:]
+        input_size = data_input_params.input_size
+        assert input_size is not None
+        assert model.model.backbone.in_size == input_size[-2:]
 
     def test_freeze_backbone(self):
         data_input_params = DataInputParams((300, 300), (0.0, 0.0, 0.0), (1.0, 1.0, 1.0))
@@ -198,7 +202,9 @@ def test_set_input_size(self, fxt_hlabel_data):
             label_info=fxt_hlabel_data,
             data_input_params=data_input_params,
         )
-        assert model.model.backbone.in_size == data_input_params.input_size[-2:]
+        input_size = data_input_params.input_size
+        assert input_size is not None
+        assert model.model.backbone.in_size == input_size[-2:]
 
     def test_freeze_backbone(self, fxt_hlabel_data):
         data_input_params = DataInputParams((300, 300), (0.0, 0.0, 0.0), (1.0, 1.0, 1.0))
diff --git a/library/tests/unit/backend/native/models/detection/conftest.py b/library/tests/unit/backend/native/models/detection/conftest.py
index 4d09642b755..b68742c57fb 100644
--- a/library/tests/unit/backend/native/models/detection/conftest.py
+++ b/library/tests/unit/backend/native/models/detection/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 """Test of custom algo modules of OTX Detection task."""
 
@@ -13,7 +13,7 @@
 @pytest.fixture
 def fxt_detection_batch(batch_size: int = 2) -> OTXSampleBatch:
     """Create a mock detection batch for testing."""
-    images = [torch.randn(3, 640, 640), torch.randn(3, 640, 640)]
+    images = torch.stack([torch.randn(3, 640, 640), torch.randn(3, 640, 640)])
 
     # Create bounding boxes and labels for each image
     bboxes = [
diff --git a/library/tests/unit/backend/native/models/detection/test_deimv2.py b/library/tests/unit/backend/native/models/detection/test_deimv2.py
index c825eab5a72..5259b26c378 100644
--- a/library/tests/unit/backend/native/models/detection/test_deimv2.py
+++ b/library/tests/unit/backend/native/models/detection/test_deimv2.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Unit tests for DEIMV2 detection model."""
@@ -307,8 +307,8 @@ def test_default_preprocessing_params(self) -> None:
         default_params = model._default_preprocessing_params
         assert isinstance(default_params, DataInputParams)
         assert default_params.input_size == (640, 640)
-        assert default_params.mean == (123.675, 116.280, 103.530)
-        assert default_params.std == (58.395, 57.120, 57.375)
+        assert default_params.mean == (0.485, 0.456, 0.406)
+        assert default_params.std == (0.229, 0.224, 0.225)
 
     def test_inheritance_from_deim_dfine(self) -> None:
         """Test that DEIMV2 properly inherits from DEIMDFine."""
diff --git a/library/tests/unit/backend/native/models/detection/test_rfdetr.py b/library/tests/unit/backend/native/models/detection/test_rfdetr.py
index 79c627fc797..47ca3d1cefb 100644
--- a/library/tests/unit/backend/native/models/detection/test_rfdetr.py
+++ b/library/tests/unit/backend/native/models/detection/test_rfdetr.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Unit tests for RF-DETR detection model."""
@@ -436,13 +436,13 @@ def test_default_preprocessing_params(self) -> None:
             label_info=3,
         )
 
-        # Check that default params use 0-255 range normalization
+        # Check that default params use 0-1 range normalization
         default_params = model._default_preprocessing_params
         assert "rfdetr_base" in default_params
         assert default_params["rfdetr_base"].input_size == (560, 560)
-        # ImageNet mean in 0-255 range
-        assert default_params["rfdetr_base"].mean == (123.675, 116.28, 103.53)
-        assert default_params["rfdetr_base"].std == (58.395, 57.12, 57.375)
+        # ImageNet mean in 0-1 range
+        assert default_params["rfdetr_base"].mean == (0.485, 0.456, 0.406)
+        assert default_params["rfdetr_base"].std == (0.229, 0.224, 0.225)
 
     def test_optimizer_configuration(self) -> None:
         """Test that optimizer configuration is properly set."""
diff --git a/library/tests/unit/backend/native/models/instance_segmentation/conftest.py b/library/tests/unit/backend/native/models/instance_segmentation/conftest.py
index 6f3d227594f..6f49b68e79b 100644
--- a/library/tests/unit/backend/native/models/instance_segmentation/conftest.py
+++ b/library/tests/unit/backend/native/models/instance_segmentation/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 """Test of custom algo modules of OTX Detection task."""
 
@@ -46,7 +46,7 @@ def fxt_instance_seg_batch(batch_size: int = 2) -> OTXSampleBatch:
     ]
 
     return OTXSampleBatch(
-        images=images,
+        images=torch.stack(images),
         bboxes=bboxes,
         labels=labels,
         masks=masks,
diff --git a/library/tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py b/library/tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py
index 54dcc6d56dc..eeda67e19d7 100644
--- a/library/tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py
+++ b/library/tests/unit/backend/native/models/instance_segmentation/heads/test_rtmdet_inst_head.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 """Unit test of rtmdet_ins_heads of OTX Instance Segmentation tasks."""
 
@@ -130,7 +130,7 @@ def test_prepare_loss_inputs(self, mocker, rtmdet_ins_head: RTMDetInstHead) -> N
         polygons[1] = np.array([[0, 0], [0, 1], [1, 1], [1, 0]])
 
         entity = OTXSampleBatch(
-            images=[torch.randn(3, 640, 640), torch.randn(3, 640, 640)],
+            images=torch.stack([torch.randn(3, 640, 640), torch.randn(3, 640, 640)]),
             imgs_info=[
                 ImageInfo(0, img_shape=(640, 640), ori_shape=(640, 640)),
                 ImageInfo(1, img_shape=(640, 640), ori_shape=(640, 640)),
diff --git a/library/tests/unit/backend/native/models/instance_segmentation/test_rfdetr_inst.py b/library/tests/unit/backend/native/models/instance_segmentation/test_rfdetr_inst.py
index 8658664bb0a..4a9d04d1b35 100644
--- a/library/tests/unit/backend/native/models/instance_segmentation/test_rfdetr_inst.py
+++ b/library/tests/unit/backend/native/models/instance_segmentation/test_rfdetr_inst.py
@@ -46,13 +46,13 @@ def test_default_preprocessing_params(self) -> None:
             label_info=3,
         )
 
-        # Check that default params use 0-255 range normalization
+        # Check that default params use 0-1 range normalization
         default_params = model._default_preprocessing_params
         assert "rfdetr_seg_n" in default_params
         assert default_params["rfdetr_seg_n"].input_size == (312, 312)
-        # ImageNet mean in 0-255 range
-        assert default_params["rfdetr_seg_n"].mean == (123.675, 116.28, 103.53)
-        assert default_params["rfdetr_seg_n"].std == (58.395, 57.12, 57.375)
+        # ImageNet mean in 0-1 range
+        assert default_params["rfdetr_seg_n"].mean == (0.485, 0.456, 0.406)
+        assert default_params["rfdetr_seg_n"].std == (0.229, 0.224, 0.225)
 
     def test_optimizer_configuration(self) -> None:
         """Test that optimizer configuration is properly set."""
diff --git a/library/tests/unit/backend/native/models/segmentation/test_base.py b/library/tests/unit/backend/native/models/segmentation/test_base.py
index 1703a12ad7a..25006d2f0e1 100644
--- a/library/tests/unit/backend/native/models/segmentation/test_base.py
+++ b/library/tests/unit/backend/native/models/segmentation/test_base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 """Unit tests for segmentation model entity."""
@@ -114,6 +114,7 @@ def test_customize_outputs_predict(self, model, batch_data_entity):
         customized_outputs = model._customize_outputs(outputs, batch_data_entity)
         assert isinstance(customized_outputs, OTXPredictionBatch)
         assert len(customized_outputs.scores) == 0
+        assert isinstance(customized_outputs.images, torch.Tensor)
         assert customized_outputs.images.shape == (2, 3, 224, 224)
         assert customized_outputs.imgs_info == []
 
diff --git a/library/tests/unit/backend/native/tools/explain/test_saliency_map_processing.py b/library/tests/unit/backend/native/tools/explain/test_saliency_map_processing.py
index 860735180d7..8bac7193916 100644
--- a/library/tests/unit/backend/native/tools/explain/test_saliency_map_processing.py
+++ b/library/tests/unit/backend/native/tools/explain/test_saliency_map_processing.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 import numpy as np
@@ -144,7 +144,7 @@ def test_process_image(postprocess) -> None:
 
 def _get_pred_result_multiclass(pred_labels, pred_scores) -> OTXPredictionBatch:
     return OTXPredictionBatch(
-        images=[torch.randn(3, OUT_SIZE, OUT_SIZE, dtype=torch.float32) for _ in range(BATCH_SIZE)],
+        images=torch.stack([torch.randn(3, OUT_SIZE, OUT_SIZE, dtype=torch.float32) for _ in range(BATCH_SIZE)]),
         imgs_info=IMGS_INFO,
         scores=pred_scores,
         labels=pred_labels,
@@ -155,7 +155,7 @@ def _get_pred_result_multiclass(pred_labels, pred_scores) -> OTXPredictionBatch:
 
 def _get_pred_result_multilabel(pred_labels, pred_scores) -> OTXPredictionBatch:
     return OTXPredictionBatch(
-        images=[torch.randn(3, OUT_SIZE, OUT_SIZE, dtype=torch.float32) for _ in range(BATCH_SIZE)],
+        images=torch.stack([torch.randn(3, OUT_SIZE, OUT_SIZE, dtype=torch.float32) for _ in range(BATCH_SIZE)]),
         imgs_info=IMGS_INFO,
         scores=pred_scores,
         labels=pred_labels,
@@ -166,7 +166,7 @@ def _get_pred_result_multilabel(pred_labels, pred_scores) -> OTXPredictionBatch:
 
 def _get_pred_result_hcls(pred_labels, pred_scores) -> OTXPredictionBatch:
     return OTXPredictionBatch(
-        images=[torch.randn(3, OUT_SIZE, OUT_SIZE, dtype=torch.float32) for _ in range(BATCH_SIZE)],
+        images=torch.stack([torch.randn(3, OUT_SIZE, OUT_SIZE, dtype=torch.float32) for _ in range(BATCH_SIZE)]),
         imgs_info=IMGS_INFO,
         scores=pred_scores,
         labels=pred_labels,
diff --git a/library/tests/unit/backend/openvino/models/test_base.py b/library/tests/unit/backend/openvino/models/test_base.py
index 1512a4e0dc8..2ca341d1edc 100644
--- a/library/tests/unit/backend/openvino/models/test_base.py
+++ b/library/tests/unit/backend/openvino/models/test_base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 """Unit tests of the OpenVINO base model."""
 
@@ -19,7 +19,7 @@ class TestOVModel:
     @pytest.fixture
     def input_batch(self) -> OTXSampleBatch:
         image = [torch.rand(3, 10, 10) for _ in range(3)]
-        return OTXSampleBatch(images=image, labels=[])
+        return OTXSampleBatch(images=torch.stack(image), labels=[])
 
     @pytest.fixture
     def model(self, get_dummy_ov_cls_model) -> OVModel:
diff --git a/library/tests/unit/cli/utils/test_jsonargparse.py b/library/tests/unit/cli/utils/test_jsonargparse.py
index 70a8443695a..1f04b42b4c9 100644
--- a/library/tests/unit/cli/utils/test_jsonargparse.py
+++ b/library/tests/unit/cli/utils/test_jsonargparse.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
@@ -29,7 +29,7 @@ def fxt_configs() -> Namespace:
                 num_workers=4,
                 transforms=[
                     {
-                        "class_path": "otx.data.transform_libs.torchvision.Resize",
+                        "class_path": "otx.data.augmentation.transforms.Resize",
                         "init_args": {
                             "keep_ratio": True,
                             "transform_bbox": True,
@@ -38,12 +38,12 @@ def fxt_configs() -> Namespace:
                         },
                     },
                     {
-                        "class_path": "otx.data.transform_libs.torchvision.Pad",
+                        "class_path": "torchvision.transforms.v2.Pad",
                         "init_args": {"pad_to_square": True, "transform_mask": True},
                     },
                     {
-                        "class_path": "otx.data.transform_libs.torchvision.RandomFlip",
-                        "init_args": {"prob": 0.5, "is_numpy_to_tvtensor": True},
+                        "class_path": "torchvision.transforms.v2.RandomHorizontalFlip",
+                        "init_args": {"p": 0.5},
                     },
                     {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.float32"}},
                     {
@@ -171,14 +171,14 @@ def test_namespace_override(fxt_configs) -> None:
             train_subset=Namespace(
                 transforms=[
                     {
-                        "class_path": "otx.data.transform_libs.torchvision.Resize",
+                        "class_path": "otx.data.augmentation.transforms.Resize",
                         "init_args": {
                             "keep_ratio": False,  # for boolean
                             "scale": [512, 512],  # for tuple
                         },
                     },
                     {
-                        "class_path": "otx.data.transform_libs.torchvision.Pad",
+                        "class_path": "torchvision.transforms.v2.Pad",
                         "init_args": {"size_divisor": 32},  # add new key
                     },
                     {
@@ -190,30 +190,38 @@ def test_namespace_override(fxt_configs) -> None:
         )
 
         # to check before adding key
-        assert "size_divisor" not in cfg.data.train_subset.transforms[1]["init_args"]
+        pad_transform = next(
+            t for t in cfg.data.train_subset.transforms if t["class_path"] == "torchvision.transforms.v2.Pad"
+        )
+        assert "size_divisor" not in pad_transform["init_args"]
 
         namespace_override(configs=cfg, key="data", overrides=overrides, convert_dict_to_namespace=False)
 
-        # otx.data.transform_libs.torchvision.Resize
-        assert (
-            cfg.data.train_subset.transforms[0]["init_args"]["keep_ratio"]
-            == overrides.train_subset.transforms[0]["init_args"]["keep_ratio"]
+        # Find transforms by class_path since order may change
+        resize_transform = next(
+            t for t in cfg.data.train_subset.transforms if t["class_path"] == "otx.data.augmentation.transforms.Resize"
+        )
+        pad_transform = next(
+            t for t in cfg.data.train_subset.transforms if t["class_path"] == "torchvision.transforms.v2.Pad"
         )
+        normalize_transform = next(
+            t for t in cfg.data.train_subset.transforms if t["class_path"] == "torchvision.transforms.v2.Normalize"
+        )
+
+        # otx.data.augmentation.transforms.Resize
         assert (
-            cfg.data.train_subset.transforms[0]["init_args"]["scale"]
-            == overrides.train_subset.transforms[0]["init_args"]["scale"]
+            resize_transform["init_args"]["keep_ratio"]
+            == overrides.train_subset.transforms[0]["init_args"]["keep_ratio"]
         )
-        # otx.data.transform_libs.torchvision.Pad
-        assert "size_divisor" in cfg.data.train_subset.transforms[1]["init_args"]
+        assert resize_transform["init_args"]["scale"] == overrides.train_subset.transforms[0]["init_args"]["scale"]
+        # torchvision.transforms.v2.Pad
+        assert "size_divisor" in pad_transform["init_args"]
         assert (
-            cfg.data.train_subset.transforms[1]["init_args"]["size_divisor"]
+            pad_transform["init_args"]["size_divisor"]
             == overrides.train_subset.transforms[1]["init_args"]["size_divisor"]
         )
         # torchvision.transforms.v2.Normalize
-        assert (
-            cfg.data.train_subset.transforms[-1]["init_args"]["std"]
-            == overrides.train_subset.transforms[-1]["init_args"]["std"]
-        )
+        assert normalize_transform["init_args"]["std"] == overrides.train_subset.transforms[-1]["init_args"]["std"]
 
         # test for appending new transform
         overrides = Namespace(
@@ -265,9 +273,21 @@ def test_namespace_override(fxt_configs) -> None:
 def test_list_override(fxt_configs) -> None:
     with patch_update_configs():
         list_override(fxt_configs, "callbacks", [])
-        assert fxt_configs.callbacks[0].init_args.prog_bar
-        assert fxt_configs.callbacks[1].init_args.patience == 10
-        assert fxt_configs.callbacks[2].init_args.max_depth == 1
+        # Find callbacks by class_path since order may change with the new list_override behavior
+        iter_timer = next(
+            c
+            for c in fxt_configs.callbacks
+            if c.class_path == "otx.backend.native.callbacks.iteration_timer.IterationTimer"
+        )
+        early_stop = next(
+            c for c in fxt_configs.callbacks if c.class_path == "lightning.pytorch.callbacks.EarlyStopping"
+        )
+        model_summary = next(
+            c for c in fxt_configs.callbacks if c.class_path == "lightning.pytorch.callbacks.RichModelSummary"
+        )
+        assert iter_timer.init_args.prog_bar
+        assert early_stop.init_args.patience == 10
+        assert model_summary.init_args.max_depth == 1
 
         # Wrong Config overriding
         wrong_override = [
@@ -285,7 +305,10 @@ def test_list_override(fxt_configs) -> None:
             },
         ]
         list_override(fxt_configs, "callbacks", callbacks_override)
-        assert fxt_configs.callbacks[1].init_args.patience == 3
+        early_stop = next(
+            c for c in fxt_configs.callbacks if c.class_path == "lightning.pytorch.callbacks.EarlyStopping"
+        )
+        assert early_stop.init_args.patience == 3
 
         logger_override = [
             {
@@ -294,7 +317,12 @@ def test_list_override(fxt_configs) -> None:
             },
         ]
         list_override(fxt_configs, "logger", logger_override)
-        assert fxt_configs.logger[1].init_args.name == "workspace/"
+        tb_logger = next(
+            lg
+            for lg in fxt_configs.logger
+            if lg.class_path == "lightning.pytorch.loggers.tensorboard.TensorBoardLogger"
+        )
+        assert tb_logger.init_args.name == "workspace/"
 
         new_callbacks_override = [
             {
diff --git a/library/tests/unit/data/augmentation/test_cached_transforms.py b/library/tests/unit/data/augmentation/test_cached_transforms.py
new file mode 100644
index 00000000000..fd179b0a883
--- /dev/null
+++ b/library/tests/unit/data/augmentation/test_cached_transforms.py
@@ -0,0 +1,303 @@
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""Unit tests for CachedMosaic, CachedMixUp, and RandomIoUCrop transforms."""
+
+from __future__ import annotations
+
+import pytest
+import torch
+from datumaro.experimental.fields import ImageInfo as DmImageInfo
+from torchvision import tv_tensors
+
+from otx.data.augmentation.transforms import CachedMixUp, CachedMosaic, RandomIoUCrop
+from otx.data.entity.sample import InstanceSegmentationSample
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _make_det_sample(
+    h: int = 64,
+    w: int = 64,
+    n_boxes: int = 2,
+) -> InstanceSegmentationSample:
+    """Create a minimal instance-seg sample for testing (always has masks)."""
+    image = torch.rand(3, h, w)
+    bboxes = torch.zeros(n_boxes, 4, dtype=torch.float32)
+    for i in range(n_boxes):
+        x1, y1 = 5 + i * 10, 5 + i * 10
+        bboxes[i] = torch.tensor([x1, y1, x1 + 20, y1 + 20])
+    labels = torch.arange(n_boxes, dtype=torch.long)
+    masks_t = torch.zeros(n_boxes, h, w, dtype=torch.uint8)
+    for i in range(n_boxes):
+        x1, y1 = int(bboxes[i, 0]), int(bboxes[i, 1])
+        x2, y2 = int(bboxes[i, 2]), int(bboxes[i, 3])
+        masks_t[i, y1:y2, x1:x2] = 1
+
+    return InstanceSegmentationSample(
+        image=tv_tensors.Image(image),
+        dm_image_info=DmImageInfo(height=h, width=w),
+        bboxes=tv_tensors.BoundingBoxes(bboxes, format=tv_tensors.BoundingBoxFormat.XYXY, canvas_size=(h, w)),  # type: ignore[no-matching-overload]
+        label=labels,
+        masks=tv_tensors.Mask(masks_t),
+    )
+
+
+# =====================================================================
+# CachedMosaic Tests
+# =====================================================================
+class TestCachedMosaicInit:
+    """Validation tests for CachedMosaic.__init__."""
+
+    def test_invalid_img_scale_type_raises(self):
+        with pytest.raises(TypeError, match="img_scale must be a tuple or list"):
+            CachedMosaic(img_scale=640)  # type: ignore[arg-type]
+
+    def test_invalid_probability_raises(self):
+        with pytest.raises(ValueError, match="probability must be in"):
+            CachedMosaic(probability=1.5)
+
+    def test_invalid_max_cached_images_raises(self):
+        with pytest.raises(ValueError, match="max_cached_images must be >= 4"):
+            CachedMosaic(max_cached_images=3)
+
+    def test_valid_construction(self):
+        m = CachedMosaic(img_scale=(320, 320), probability=0.5, max_cached_images=10)
+        assert m.img_scale == (320, 320)
+        assert m.prob == 0.5
+        assert m.max_cached_images == 10
+
+
+class TestCachedMosaicForward:
+    """Functional tests for CachedMosaic augmentation."""
+
+    def test_cache_too_small_returns_input(self):
+        """With fewer than 4 cached images, forward returns input unchanged."""
+        mosaic = CachedMosaic(img_scale=(32, 32), probability=1.0, max_cached_images=10)
+        sample = _make_det_sample(h=32, w=32)
+        result = mosaic(sample)
+        # After first call, cache has 1 item → should return input image unchanged
+        assert result.image.shape == sample.image.shape
+
+    def test_mosaic_applied_after_cache_fills(self):
+        """After 4+ samples cached, mosaic produces doubled-size output."""
+        mosaic = CachedMosaic(img_scale=(32, 32), probability=1.0, max_cached_images=40)
+        for _ in range(4):
+            sample = _make_det_sample(h=32, w=32, n_boxes=2)
+            result = mosaic(sample)
+        # After 4 calls, cache is full enough; 5th call should produce mosaic
+        sample = _make_det_sample(h=32, w=32, n_boxes=2)
+        result = mosaic(sample)
+        # Mosaic output should be 2x the img_scale
+        assert result.image.shape[-2:] == (64, 64)
+        # Image values should be in [0, 1]
+        assert result.image.min() >= 0.0
+        assert result.image.max() <= 1.0
+
+    def test_mosaic_with_masks(self):
+        """Mosaic should handle instance segmentation masks."""
+        mosaic = CachedMosaic(img_scale=(32, 32), probability=1.0, max_cached_images=40)
+        for _ in range(5):
+            sample = _make_det_sample(h=32, w=32, n_boxes=2)
+            result = mosaic(sample)
+        # After enough samples, mosaic should produce masks
+        assert result.masks is not None
+        assert result.masks.shape[-2:] == (64, 64)
+
+    def test_probability_zero_returns_input(self):
+        """With probability=0, mosaic never applies (even with full cache)."""
+        mosaic = CachedMosaic(img_scale=(32, 32), probability=0.0, max_cached_images=40)
+        for _ in range(5):
+            sample = _make_det_sample(h=32, w=32)
+            _ = mosaic(sample)
+        sample = _make_det_sample(h=32, w=32)
+        result = mosaic(sample)
+        # prob=0 → always skip → original size preserved
+        assert result.image.shape[-2:] == (32, 32)
+
+    def test_cache_eviction(self):
+        """Cache should not grow beyond max_cached_images."""
+        mosaic = CachedMosaic(img_scale=(32, 32), probability=0.0, max_cached_images=5)
+        for _ in range(10):
+            sample = _make_det_sample(h=32, w=32)
+            mosaic(sample)
+        assert len(mosaic.results_cache) <= 5
+
+    def test_bboxes_valid_after_mosaic(self):
+        """Bboxes should be valid XYXY after mosaic (x2 > x1, y2 > y1)."""
+        mosaic = CachedMosaic(img_scale=(32, 32), probability=1.0, max_cached_images=40)
+        for _ in range(5):
+            sample = _make_det_sample(h=32, w=32, n_boxes=3)
+            result = mosaic(sample)
+        # After mosaic, bboxes should be valid
+        if len(result.bboxes) > 0:
+            widths = result.bboxes[:, 2] - result.bboxes[:, 0]
+            heights = result.bboxes[:, 3] - result.bboxes[:, 1]
+            assert (widths > 0).all()
+            assert (heights > 0).all()
+
+    def test_labels_match_bboxes(self):
+        """Labels should be aligned with bboxes after mosaic."""
+        mosaic = CachedMosaic(img_scale=(32, 32), probability=1.0, max_cached_images=40)
+        for _ in range(5):
+            sample = _make_det_sample(h=32, w=32, n_boxes=2)
+            result = mosaic(sample)
+        assert result.bboxes.shape[0] == result.label.shape[0]
+
+    def test_get_indexes(self):
+        """get_indexes should return 3 random indices within cache bounds."""
+        mosaic = CachedMosaic(img_scale=(32, 32))
+        cache = list(range(10))
+        indices = mosaic.get_indexes(cache)
+        assert len(indices) == 3
+        assert all(0 <= i < 10 for i in indices)
+
+    def test_repr(self):
+        mosaic = CachedMosaic(img_scale=(320, 320), probability=0.8)
+        r = repr(mosaic)
+        assert "CachedMosaic" in r
+        assert "320" in r
+        assert "0.8" in r
+
+
+# =====================================================================
+# CachedMixUp Tests
+# =====================================================================
+class TestCachedMixUpInit:
+    """Validation tests for CachedMixUp.__init__."""
+
+    def test_invalid_img_scale_type_raises(self):
+        with pytest.raises(TypeError, match="img_scale must be a tuple or list"):
+            CachedMixUp(img_scale=640)  # type: ignore[arg-type]
+
+    def test_invalid_max_cached_images_raises(self):
+        with pytest.raises(ValueError, match="Cache size must be >= 2"):
+            CachedMixUp(max_cached_images=1)
+
+    def test_invalid_probability_raises(self):
+        with pytest.raises(ValueError, match="Probability must be in"):
+            CachedMixUp(probability=-0.1)
+
+    def test_valid_construction(self):
+        m = CachedMixUp(img_scale=(320, 320), probability=0.5, max_cached_images=10)
+        assert m.img_scale == (320, 320)
+        assert m.prob == 0.5
+
+
+class TestCachedMixUpForward:
+    """Functional tests for CachedMixUp augmentation."""
+
+    def test_cache_too_small_returns_input(self):
+        """With only 1 cached sample, forward returns input unchanged."""
+        mixup = CachedMixUp(img_scale=(32, 32), probability=1.0, max_cached_images=20)
+        sample = _make_det_sample(h=32, w=32)
+        result = mixup(sample)
+        # First call: cache has 1 item → early return
+        assert result.image.shape == sample.image.shape
+
+    def test_mixup_applied_after_cache_fills(self):
+        """After 2+ samples cached, mixup blends images."""
+        mixup = CachedMixUp(
+            img_scale=(32, 32),
+            probability=1.0,
+            max_cached_images=20,
+            mix_ratio=0.5,
+        )
+        for _ in range(3):
+            sample = _make_det_sample(h=32, w=32, n_boxes=2)
+            result = mixup(sample)
+        # After 3 calls, cache has enough, mixup should produce valid result
+        assert result.image.shape[-2:] == (32, 32)
+        # Combined bboxes: at least original count
+        assert result.bboxes.shape[0] >= 2
+
+    def test_probability_zero_returns_input(self):
+        """With probability=0, mixup never applies."""
+        mixup = CachedMixUp(img_scale=(32, 32), probability=0.0, max_cached_images=20)
+        for _ in range(5):
+            sample = _make_det_sample(h=32, w=32, n_boxes=2)
+            result = mixup(sample)
+        # prob=0 → always skip → bboxes should be exactly the input count
+        assert result.bboxes.shape[0] == 2
+
+    def test_cache_eviction(self):
+        """Cache should not grow beyond max_cached_images."""
+        mixup = CachedMixUp(img_scale=(32, 32), probability=0.0, max_cached_images=5)
+        for _ in range(10):
+            mixup(_make_det_sample(h=32, w=32))
+        assert len(mixup.results_cache) <= 5
+
+    def test_labels_match_bboxes(self):
+        """Labels count must match bboxes count after mixup."""
+        mixup = CachedMixUp(img_scale=(32, 32), probability=1.0, max_cached_images=20)
+        for _ in range(5):
+            result = mixup(_make_det_sample(h=32, w=32, n_boxes=2))
+        assert result.bboxes.shape[0] == result.label.shape[0]
+
+    def test_mixup_with_masks(self):
+        """MixUp should handle instance segmentation masks."""
+        mixup = CachedMixUp(img_scale=(32, 32), probability=1.0, max_cached_images=20)
+        for _ in range(5):
+            result = mixup(_make_det_sample(h=32, w=32, n_boxes=2))
+        assert result.masks is not None
+        # Masks count should match bboxes count
+        assert result.masks.shape[0] == result.bboxes.shape[0]
+
+    def test_image_clamped_to_unit(self):
+        """MixUp output image should be clamped to [0, 1]."""
+        mixup = CachedMixUp(img_scale=(32, 32), probability=1.0, max_cached_images=20)
+        for _ in range(5):
+            result = mixup(_make_det_sample(h=32, w=32))
+        assert result.image.min() >= 0.0
+        assert result.image.max() <= 1.0
+
+    def test_repr(self):
+        m = CachedMixUp(img_scale=(640, 640), mix_ratio=0.3)
+        r = repr(m)
+        assert "CachedMixUp" in r
+        assert "640" in r
+        assert "0.3" in r
+
+
+# =====================================================================
+# RandomIoUCrop Tests
+# =====================================================================
+class TestRandomIoUCrop:
+    """Tests for RandomIoUCrop probability gating."""
+
+    def test_probability_zero_passthrough(self):
+        """With p=0, input is returned unchanged."""
+        crop = RandomIoUCrop(probability=0.0)
+        image = tv_tensors.Image(torch.rand(3, 100, 100))
+        bboxes = tv_tensors.BoundingBoxes(  # type: ignore[no-matching-overload]
+            torch.tensor([[10.0, 10.0, 50.0, 50.0]]),
+            format=tv_tensors.BoundingBoxFormat.XYXY,
+            canvas_size=(100, 100),
+        )
+        labels = torch.tensor([0])
+        result = crop(image, bboxes, labels)
+        # p=0 → always skip → returns tuple of inputs
+        assert isinstance(result, tuple)
+        assert torch.equal(result[0], image)
+
+    def test_probability_one_applies(self):
+        """With p=1, crop always applies (output shape may differ)."""
+        crop = RandomIoUCrop(probability=1.0)
+        image = tv_tensors.Image(torch.rand(3, 100, 100))
+        bboxes = tv_tensors.BoundingBoxes(  # type: ignore[no-matching-overload]
+            torch.tensor([[10.0, 10.0, 50.0, 50.0]]),
+            format=tv_tensors.BoundingBoxFormat.XYXY,
+            canvas_size=(100, 100),
+        )
+        labels = torch.tensor([0])
+        # Should not raise
+        result = crop(image, bboxes, labels)
+        assert result is not None
+
+    def test_single_input_returns_single(self):
+        """With p=0 and single input, returns the input (not a tuple)."""
+        crop = RandomIoUCrop(probability=0.0)
+        image = tv_tensors.Image(torch.rand(3, 50, 50))
+        result = crop(image)
+        # Single input + skip → returns single tensor
+        assert isinstance(result, torch.Tensor)
diff --git a/library/tests/unit/data/augmentation/test_intensity.py b/library/tests/unit/data/augmentation/test_intensity.py
new file mode 100644
index 00000000000..7ff252efbed
--- /dev/null
+++ b/library/tests/unit/data/augmentation/test_intensity.py
@@ -0,0 +1,276 @@
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""Unit tests for intensity mapping transforms."""
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from otx.config.data import IntensityConfig
+from otx.data.augmentation.intensity import (
+    PercentileClip,
+    RangeScale,
+    RepeatChannels,
+    ScaleToUnit,
+    WindowLevel,
+    build_intensity_transform,
+)
+
+
+class TestScaleToUnit:
+    def test_uint8_scale(self):
+        t = ScaleToUnit(max_value=255.0)
+        x = torch.tensor([0, 128, 255], dtype=torch.uint8)
+        out = t(x)
+        assert out.dtype == torch.float32
+        assert torch.allclose(out, torch.tensor([0.0, 128.0 / 255.0, 1.0]))
+
+    def test_uint16_scale(self):
+        t = ScaleToUnit(max_value=65535.0)
+        x = torch.tensor([0, 32768, 65535], dtype=torch.int32)  # int32 for uint16 data
+        out = t(x)
+        assert out.dtype == torch.float32
+        assert torch.isclose(out[0], torch.tensor(0.0))
+        assert torch.isclose(out[2], torch.tensor(1.0))
+        assert 0.0 <= out[1].item() <= 1.0
+
+    def test_clamps_above_max(self):
+        t = ScaleToUnit(max_value=100.0)
+        x = torch.tensor([200.0])
+        out = t(x)
+        assert out.item() == 1.0
+
+    def test_3d_image(self):
+        t = ScaleToUnit(max_value=255.0)
+        x = torch.randint(0, 256, (3, 64, 64), dtype=torch.uint8)
+        out = t(x)
+        assert out.shape == (3, 64, 64)
+        assert out.dtype == torch.float32
+        assert out.min() >= 0.0
+        assert out.max() <= 1.0
+
+
+class TestWindowLevel:
+    def test_basic_window(self):
+        t = WindowLevel(center=100.0, width=200.0)
+        x = torch.tensor([0.0, 100.0, 200.0, -50.0, 300.0])
+        out = t(x)
+        assert torch.isclose(out[0], torch.tensor(0.0))
+        assert torch.isclose(out[1], torch.tensor(0.5))
+        assert torch.isclose(out[2], torch.tensor(1.0))
+        assert out[3].item() == 0.0  # clamped
+        assert out[4].item() == 1.0  # clamped
+
+    def test_ct_window(self):
+        """Typical CT brain window: center=40, width=80 → [0, 80]."""
+        t = WindowLevel(center=40.0, width=80.0)
+        x = torch.tensor([0.0, 40.0, 80.0])
+        out = t(x)
+        assert torch.isclose(out[0], torch.tensor(0.0))
+        assert torch.isclose(out[1], torch.tensor(0.5))
+        assert torch.isclose(out[2], torch.tensor(1.0))
+
+
+class TestPercentileClip:
+    def test_uniform_image(self):
+        t = PercentileClip(low=1.0, high=99.0)
+        x = torch.arange(0, 1000, dtype=torch.float32)
+        out = t(x)
+        assert out.dtype == torch.float32
+        assert out.min() >= 0.0
+        assert out.max() <= 1.0
+
+    def test_constant_image_returns_zeros(self):
+        t = PercentileClip(low=1.0, high=99.0)
+        x = torch.full((3, 32, 32), 42.0)
+        out = t(x)
+        assert torch.all(out == 0.0)
+
+    def test_invalid_percentiles(self):
+        with pytest.raises(ValueError, match="Percentiles must satisfy"):
+            PercentileClip(low=99.0, high=1.0)
+
+    def test_3d_image(self):
+        t = PercentileClip(low=5.0, high=95.0)
+        x = torch.randint(0, 65536, (1, 64, 64), dtype=torch.int32).float()
+        out = t(x)
+        assert out.shape == (1, 64, 64)
+        assert out.min() >= 0.0
+        assert out.max() <= 1.0
+
+
+class TestRangeScale:
+    def test_thermal_conversion(self):
+        """Reproduce process_raw_thermal.py: raw * 0.4, clip [295.15, 360.15], normalize."""
+        t = RangeScale(scale_factor=0.4, min_value=295.15, max_value=360.15)
+        # A pixel with raw value 900 → 900 * 0.4 = 360.0 → in range → (360.0-295.15)/(360.15-295.15)
+        x = torch.tensor([900.0])
+        out = t(x)
+        expected = (900.0 * 0.4 - 295.15) / (360.15 - 295.15)
+        assert torch.isclose(out, torch.tensor(expected))
+
+    def test_thermal_clamping_low(self):
+        t = RangeScale(scale_factor=0.4, min_value=295.15, max_value=360.15)
+        # 100 * 0.4 = 40 < 295.15 → clipped to 295.15 → 0.0
+        x = torch.tensor([100.0])
+        out = t(x)
+        assert torch.isclose(out, torch.tensor(0.0))
+
+    def test_thermal_clamping_high(self):
+        t = RangeScale(scale_factor=0.4, min_value=295.15, max_value=360.15)
+        # 60000 * 0.4 = 24000 > 360.15 → clipped to 360.15 → 1.0
+        x = torch.tensor([60000.0])
+        out = t(x)
+        assert torch.isclose(out, torch.tensor(1.0))
+
+    def test_int32_input(self):
+        """Simulates raw uint16 data stored as int32 tensor."""
+        t = RangeScale(scale_factor=0.4, min_value=295.15, max_value=360.15)
+        x = torch.tensor([900], dtype=torch.int32)
+        out = t(x)
+        assert out.dtype == torch.float32
+        assert 0.0 <= out.item() <= 1.0
+
+    def test_invalid_range(self):
+        with pytest.raises(ValueError, match="max_value must be > min_value"):
+            RangeScale(scale_factor=1.0, min_value=100.0, max_value=50.0)
+
+    def test_batch_3d(self):
+        t = RangeScale(scale_factor=0.4, min_value=295.15, max_value=360.15)
+        x = torch.randint(700, 1000, (1, 32, 32), dtype=torch.int32)
+        out = t(x)
+        assert out.shape == (1, 32, 32)
+        assert out.dtype == torch.float32
+
+
+class TestRepeatChannels:
+    def test_single_to_three(self):
+        t = RepeatChannels(num_channels=3)
+        x = torch.randn(1, 64, 64)
+        out = t(x)
+        assert out.shape == (3, 64, 64)
+        # All channels should be identical
+        assert torch.equal(out[0], out[1])
+        assert torch.equal(out[0], out[2])
+
+    def test_three_channel_passthrough(self):
+        t = RepeatChannels(num_channels=3)
+        x = torch.randn(3, 64, 64)
+        out = t(x)
+        assert torch.equal(x, out)
+
+    def test_batched_4d(self):
+        t = RepeatChannels(num_channels=3)
+        x = torch.randn(4, 1, 64, 64)
+        out = t(x)
+        assert out.shape == (4, 3, 64, 64)
+
+    def test_batched_3ch_passthrough(self):
+        t = RepeatChannels(num_channels=3)
+        x = torch.randn(4, 3, 64, 64)
+        out = t(x)
+        assert torch.equal(x, out)
+
+
+class TestBuildIntensityTransform:
+    def test_default_uint8(self):
+        cfg = IntensityConfig()
+        pipeline = build_intensity_transform(cfg)
+        # Should contain ScaleToUnit(255)
+        assert len(pipeline) == 1
+        assert isinstance(pipeline[0], ScaleToUnit)
+        assert pipeline[0].max_value == 255.0
+
+    def test_uint16_scale_to_unit(self):
+        cfg = IntensityConfig(storage_dtype="uint16")
+        pipeline = build_intensity_transform(cfg)
+        assert isinstance(pipeline[0], ScaleToUnit)
+        assert pipeline[0].max_value == 65535.0
+
+    def test_window_mode(self):
+        cfg = IntensityConfig(
+            storage_dtype="uint16",
+            mode="window",
+            window_center=40.0,
+            window_width=80.0,
+        )
+        pipeline = build_intensity_transform(cfg)
+        assert isinstance(pipeline[0], WindowLevel)
+
+    def test_window_mode_missing_params(self):
+        cfg = IntensityConfig(mode="window")
+        with pytest.raises(ValueError, match="window_center and window_width"):
+            build_intensity_transform(cfg)
+
+    def test_percentile_mode(self):
+        cfg = IntensityConfig(mode="percentile", percentile_low=2.0, percentile_high=98.0)
+        pipeline = build_intensity_transform(cfg)
+        assert isinstance(pipeline[0], PercentileClip)
+        assert pipeline[0].low == 2.0
+        assert pipeline[0].high == 98.0
+
+    def test_range_scale_mode(self):
+        cfg = IntensityConfig(
+            storage_dtype="uint16",
+            mode="range_scale",
+            scale_factor=0.4,
+            min_value=295.15,
+            max_value=360.15,
+        )
+        pipeline = build_intensity_transform(cfg)
+        assert isinstance(pipeline[0], RangeScale)
+
+    def test_range_scale_missing_max(self):
+        cfg = IntensityConfig(mode="range_scale")
+        with pytest.raises(ValueError, match="requires max_value"):
+            build_intensity_transform(cfg)
+
+    def test_unknown_mode(self):
+        cfg = IntensityConfig(mode="foobar")
+        with pytest.raises(ValueError, match="Unknown IntensityConfig mode"):
+            build_intensity_transform(cfg)
+
+    def test_repeat_channels(self):
+        cfg = IntensityConfig(storage_dtype="uint16", repeat_channels=3)
+        pipeline = build_intensity_transform(cfg)
+        assert len(pipeline) == 2
+        assert isinstance(pipeline[0], ScaleToUnit)
+        assert isinstance(pipeline[1], RepeatChannels)
+
+    def test_thermal_e2e(self):
+        """End-to-end test matching process_raw_thermal.py behavior."""
+        cfg = IntensityConfig(
+            storage_dtype="uint16",
+            mode="range_scale",
+            scale_factor=0.4,
+            min_value=295.15,
+            max_value=360.15,
+            repeat_channels=3,
+        )
+        pipeline = build_intensity_transform(cfg)
+
+        # Simulate a 1-channel thermal image with raw uint16 values (as int32)
+        raw_image = torch.randint(700, 1000, (1, 64, 64), dtype=torch.int32)
+        out = pipeline(raw_image)
+
+        assert out.dtype == torch.float32
+        assert out.shape == (3, 64, 64)
+        assert out.min() >= 0.0
+        assert out.max() <= 1.0
+
+    def test_uint8_e2e_matches_old_behavior(self):
+        """Default uint8 path should produce same results as to_dtype(float32, scale=True)."""
+        cfg = IntensityConfig()  # defaults
+        pipeline = build_intensity_transform(cfg)
+
+        x = torch.tensor([0, 128, 255], dtype=torch.uint8)
+        new_result = pipeline(x)
+
+        # Old behavior: to_dtype divides by 255
+        import torchvision.transforms.v2.functional as f
+
+        old_result = f.to_dtype(x, dtype=torch.float32, scale=True)
+
+        assert torch.allclose(new_result, old_result, atol=1e-6)
diff --git a/library/tests/unit/data/augmentation/test_kernels.py b/library/tests/unit/data/augmentation/test_kernels.py
new file mode 100644
index 00000000000..dc9faf41938
--- /dev/null
+++ b/library/tests/unit/data/augmentation/test_kernels.py
@@ -0,0 +1,111 @@
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""Unit tests for torchvision kernel registrations on ImageInfo."""
+
+from __future__ import annotations
+
+import pytest
+
+from otx.data.augmentation.kernels import (
+    _center_crop_image_info,
+    _crop_image_info,
+    _pad_image_info,
+    _resize_image_info,
+    _resized_crop_image_info,
+)
+from otx.data.entity.base import ImageInfo
+
+
+def _make_img_info(h: int = 100, w: int = 200) -> ImageInfo:
+    """Create an ImageInfo with the given original dimensions."""
+    return ImageInfo(img_idx=0, img_shape=(h, w), ori_shape=(h, w))  # type: ignore[no-matching-overload]
+
+
+class TestResizeImageInfo:
+    """Tests for _resize_image_info kernel."""
+
+    def test_resize_two_element_size(self):
+        info = _make_img_info(100, 200)
+        result = _resize_image_info(info, [50, 100])
+        assert result.img_shape == (50, 100)
+        assert result.scale_factor == pytest.approx((0.5, 0.5))
+
+    def test_resize_single_element_size(self):
+        info = _make_img_info(100, 200)
+        result = _resize_image_info(info, [64])
+        assert result.img_shape == (64, 64)
+
+    def test_resize_invalid_size_raises(self):
+        info = _make_img_info()
+        with pytest.raises(ValueError, match=r"\[1, 2, 3\]"):
+            _resize_image_info(info, [1, 2, 3])
+
+    def test_resize_scale_factor(self):
+        info = _make_img_info(100, 200)
+        _resize_image_info(info, [200, 400])
+        assert info.scale_factor == pytest.approx((2.0, 2.0))
+
+
+class TestCropImageInfo:
+    """Tests for _crop_image_info kernel."""
+
+    def test_crop_updates_shape(self):
+        info = _make_img_info(100, 200)
+        result = _crop_image_info(info, height=50, width=80)
+        assert result.img_shape == (50, 80)
+
+    def test_crop_clears_scale_factor(self):
+        info = _make_img_info(100, 200)
+        _crop_image_info(info, height=50, width=80)
+        assert info.scale_factor is None
+
+
+class TestResizedCropImageInfo:
+    """Tests for _resized_crop_image_info kernel."""
+
+    def test_resized_crop_two_element(self):
+        info = _make_img_info()
+        result = _resized_crop_image_info(info, [128, 256])
+        assert result.img_shape == (128, 256)
+        assert result.scale_factor is None
+
+    def test_resized_crop_one_element(self):
+        info = _make_img_info()
+        result = _resized_crop_image_info(info, [64])
+        assert result.img_shape == (64, 64)
+
+    def test_resized_crop_invalid_raises(self):
+        info = _make_img_info()
+        with pytest.raises(ValueError, match=r"\[1, 2, 3\]"):
+            _resized_crop_image_info(info, [1, 2, 3])
+
+
+class TestCenterCropImageInfo:
+    """Tests for _center_crop_image_info kernel."""
+
+    def test_center_crop(self):
+        info = _make_img_info(100, 200)
+        result = _center_crop_image_info(info, output_size=[50, 80])
+        assert result.img_shape == (50, 80)
+        assert result.scale_factor is None
+
+
+class TestPadImageInfo:
+    """Tests for _pad_image_info kernel."""
+
+    def test_pad_int(self):
+        info = _make_img_info(100, 200)
+        result = _pad_image_info(info, padding=10)
+        # int padding → all sides = 10
+        assert result.img_shape == (120, 220)
+
+    def test_pad_list_two(self):
+        info = _make_img_info(100, 200)
+        result = _pad_image_info(info, padding=[5, 10])
+        assert result.img_shape == (120, 210)
+
+    def test_pad_list_four(self):
+        info = _make_img_info(100, 200)
+        result = _pad_image_info(info, padding=[1, 2, 3, 4])
+        assert result.img_shape == (106, 204)
+        assert result.padding == (1, 2, 3, 4)
diff --git a/library/tests/unit/data/augmentation/test_pipeline.py b/library/tests/unit/data/augmentation/test_pipeline.py
new file mode 100644
index 00000000000..dbdb2f8c22b
--- /dev/null
+++ b/library/tests/unit/data/augmentation/test_pipeline.py
@@ -0,0 +1,1051 @@
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""Unit tests for CPU and GPU augmentation pipelines and the hybrid approach."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+from unittest.mock import MagicMock
+
+import kornia.augmentation as kornia_aug
+import pytest
+import torch
+import torchvision.transforms.v2 as tvt_v2
+from torch import nn
+from torchvision import tv_tensors
+
+from otx.config.data import IntensityConfig, SubsetConfig
+from otx.data.augmentation.pipeline import (
+    CPUAugmentationPipeline,
+    GPUAugmentationPipeline,
+    _configure_input_size,
+    _eval_input_size_str,
+    _IntensityAdapter,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_image(h: int = 64, w: int = 64, c: int = 3, dtype: torch.dtype = torch.uint8) -> torch.Tensor:
+    """Create a random image tensor (C, H, W)."""
+    if dtype == torch.uint8:
+        return torch.randint(0, 256, (c, h, w), dtype=torch.uint8)
+    if dtype == torch.float32:
+        return torch.rand(c, h, w)
+    if dtype == torch.int32:
+        return torch.randint(0, 65536, (c, h, w), dtype=torch.int32)
+    return torch.rand(c, h, w)
+
+
+def _make_batched_images(batch_size: int = 2, c: int = 3, h: int = 32, w: int = 32) -> torch.Tensor:
+    """Create batched float images (B, C, H, W) in [0, 1]."""
+    return torch.rand(batch_size, c, h, w)
+
+
+@dataclass
+class _SimpleSample:
+    """Minimal sample for testing CPU pipeline forward."""
+
+    image: torch.Tensor
+    bboxes: tv_tensors.BoundingBoxes | None = None
+    masks: torch.Tensor | None = None
+    label: torch.Tensor | None = None
+    img_info: Any = None
+
+
+def _make_sample(
+    h: int = 64,
+    w: int = 64,
+    dtype: torch.dtype = torch.uint8,
+    with_bboxes: bool = False,
+) -> _SimpleSample:
+    """Build a minimal sample for CPU pipeline tests."""
+    image = _make_image(h, w, dtype=dtype)
+    bboxes = None
+    label = None
+    if with_bboxes:
+        bboxes = tv_tensors.BoundingBoxes(  # type: ignore[call-overload]
+            torch.tensor([[10.0, 10.0, 30.0, 30.0]]),
+            format=tv_tensors.BoundingBoxFormat.XYXY,
+            canvas_size=(h, w),
+        )
+        label = torch.tensor([0])
+    return _SimpleSample(image=image, bboxes=bboxes, label=label)
+
+
+# ===================================================================
+# CPUAugmentationPipeline tests
+# ===================================================================
+
+
+class TestCPUAugmentationPipelineInit:
+    """Tests for CPUAugmentationPipeline construction."""
+
+    def test_empty_pipeline(self):
+        pipeline = CPUAugmentationPipeline()
+        assert len(pipeline.augmentations) == 0
+
+    def test_pipeline_with_transforms(self):
+        transforms: list[nn.Module] = [tvt_v2.RandomHorizontalFlip(p=0.5), tvt_v2.ToDtype(torch.float32, scale=True)]
+        pipeline = CPUAugmentationPipeline(transforms)
+        assert len(pipeline.augmentations) == 2
+
+    def test_pipeline_is_nn_module(self):
+        pipeline = CPUAugmentationPipeline()
+        assert isinstance(pipeline, nn.Module)
+
+    def test_augmentations_are_module_list(self):
+        transforms: list[nn.Module] = [tvt_v2.RandomHorizontalFlip(p=0.5)]
+        pipeline = CPUAugmentationPipeline(transforms)
+        assert isinstance(pipeline.augmentations, nn.ModuleList)
+
+
+class TestCPUAugmentationPipelineListAvailable:
+    """Tests for list_available_transforms."""
+
+    def test_returns_list(self):
+        result = CPUAugmentationPipeline.list_available_transforms()
+        assert isinstance(result, list)
+        assert len(result) > 0
+
+    def test_all_are_transform_subclasses(self):
+        result = CPUAugmentationPipeline.list_available_transforms()
+        for cls in result:
+            assert issubclass(cls, tvt_v2.Transform)
+
+
+class TestCPUAugmentationPipelineDispatchTransform:
+    """Tests for _dispatch_transform."""
+
+    def test_dict_config(self):
+        cfg = {
+            "class_path": "torchvision.transforms.v2.RandomHorizontalFlip",
+            "init_args": {"p": 0.5},
+        }
+        result = CPUAugmentationPipeline._dispatch_transform(cfg)
+        assert isinstance(result, tvt_v2.RandomHorizontalFlip)
+
+    def test_already_instantiated(self):
+        transform = tvt_v2.RandomHorizontalFlip(p=0.5)
+        result = CPUAugmentationPipeline._dispatch_transform(transform)
+        assert result is transform
+
+    def test_unsupported_type_raises(self):
+        with pytest.raises(TypeError, match="CPUAugmentationPipeline accepts only"):
+            CPUAugmentationPipeline._dispatch_transform("bad_value")  # type: ignore[arg-type]
+
+
+class TestCPUAugmentationPipelineFromConfig:
+    """Tests for from_config."""
+
+    def test_empty_config(self):
+        """With default IntensityConfig, an empty augmentations_cpu still gets the intensity transform."""
+        config = SubsetConfig(augmentations_cpu=[], input_size=None)
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        # Default IntensityConfig(mode='scale_to_unit') always prepends an intensity transform
+        assert len(pipeline.augmentations) == 1
+        assert isinstance(pipeline.augmentations[0], _IntensityAdapter)
+
+    def test_empty_config_no_intensity(self):
+        """With intensity=None, no transforms are created."""
+        config = SubsetConfig(augmentations_cpu=[], input_size=None)
+        config.intensity = None  # type: ignore[assignment]
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        assert len(pipeline.augmentations) == 0
+
+    def test_with_augmentations(self):
+        config = SubsetConfig(
+            augmentations_cpu=[
+                {
+                    "class_path": "torchvision.transforms.v2.RandomHorizontalFlip",
+                    "init_args": {"p": 0.5},
+                },
+            ],
+            input_size=None,
+        )
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        # 1 intensity (default) + 1 user augmentation
+        assert len(pipeline.augmentations) == 2
+        assert isinstance(pipeline.augmentations[0], _IntensityAdapter)
+        assert isinstance(pipeline.augmentations[1], tvt_v2.RandomHorizontalFlip)
+
+    def test_nn_module_passthrough(self):
+        """Pre-instantiated nn.Module should be passed through directly."""
+        flip = tvt_v2.RandomHorizontalFlip(p=1.0)
+        config = SubsetConfig(augmentations_cpu=[flip], input_size=None)  # type: ignore[arg-type]
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        # 1 intensity (default) + 1 nn.Module passthrough
+        assert len(pipeline.augmentations) == 2
+        assert isinstance(pipeline.augmentations[1], tvt_v2.RandomHorizontalFlip)
+
+    def test_unsupported_config_type_raises(self):
+        config = SubsetConfig(augmentations_cpu=["bad_value"], input_size=None)  # type: ignore[arg-type]
+        with pytest.raises(TypeError, match="Unsupported augmentation config type"):
+            CPUAugmentationPipeline.from_config(config)
+
+    def test_intensity_config_prepended(self):
+        """IntensityConfig should add an intensity transform as the first augmentation."""
+        config = SubsetConfig(
+            augmentations_cpu=[
+                {
+                    "class_path": "torchvision.transforms.v2.RandomHorizontalFlip",
+                    "init_args": {"p": 0.5},
+                },
+            ],
+            intensity=IntensityConfig(storage_dtype="uint8", mode="scale_to_unit"),
+            input_size=None,
+        )
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        # intensity transform (wrapped) + 1 user augmentation
+        assert len(pipeline.augmentations) == 2
+        # First transform should be an _IntensityAdapter wrapping ScaleToUnit
+        from otx.data.augmentation.intensity import ScaleToUnit
+
+        assert isinstance(pipeline.augmentations[0], _IntensityAdapter)
+        # The inner nn.Sequential should contain ScaleToUnit
+        inner = pipeline.augmentations[0].transform
+        assert isinstance(inner[0], ScaleToUnit)  # type: ignore[bad-index]
+
+    def test_intensity_config_none_no_prepend(self):
+        """No intensity config → no prepended transform."""
+        config = SubsetConfig(
+            augmentations_cpu=[
+                {
+                    "class_path": "torchvision.transforms.v2.RandomHorizontalFlip",
+                    "init_args": {"p": 0.5},
+                },
+            ],
+            input_size=None,
+        )
+        # Manually set intensity to None to simulate legacy config
+        config.intensity = None  # type: ignore[assignment]
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        assert len(pipeline.augmentations) == 1
+
+    def test_intensity_uint16_range_scale(self):
+        """IntensityConfig with range_scale mode should prepend RangeScale."""
+        config = SubsetConfig(
+            augmentations_cpu=[],
+            intensity=IntensityConfig(
+                storage_dtype="uint16",
+                mode="range_scale",
+                scale_factor=0.4,
+                min_value=295.15,
+                max_value=360.15,
+            ),
+            input_size=None,
+        )
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        from otx.data.augmentation.intensity import RangeScale
+
+        assert len(pipeline.augmentations) == 1
+        assert isinstance(pipeline.augmentations[0], _IntensityAdapter)
+        assert isinstance(pipeline.augmentations[0].transform[0], RangeScale)  # type: ignore[bad-index]
+
+
+class TestCPUAugmentationPipelineInputSize:
+    """Tests for _configure_input_size and _eval_input_size_str."""
+
+    def test_eval_simple_tuple(self):
+        result = _eval_input_size_str("(224, 224)")
+        assert result == (224, 224)
+
+    def test_eval_multiplication(self):
+        result = _eval_input_size_str("(224, 224) * 2")
+        assert result == (448, 448)
+
+    def test_eval_division(self):
+        result = _eval_input_size_str("(400, 400) / 2")
+        assert result == (200, 200)
+
+    def test_eval_int(self):
+        result = _eval_input_size_str("224")
+        assert result == 224
+
+    def test_eval_negative(self):
+        result = _eval_input_size_str("-1")
+        assert result == -1
+
+    def test_eval_bad_syntax_raises(self):
+        """Addition is not supported in the safe eval — should raise SyntaxError."""
+        with pytest.raises(SyntaxError, match="Bad syntax"):
+            _eval_input_size_str("(224, 224) + 1")
+
+    def test_configure_input_size_no_placeholder(self):
+        cfg = {
+            "class_path": "torchvision.transforms.v2.Resize",
+            "init_args": {"size": [224, 224]},
+        }
+        result = _configure_input_size(cfg, (320, 320))
+        assert result["init_args"]["size"] == [224, 224]
+
+    def test_configure_input_size_missing_raises(self):
+        cfg = {
+            "class_path": "torchvision.transforms.v2.Resize",
+            "init_args": {"size": "$(input_size)"},
+        }
+        with pytest.raises(RuntimeError, match="input_size is set to None"):
+            _configure_input_size(cfg, None)
+
+    def test_configure_input_size_no_init_args(self):
+        cfg = {"class_path": "torchvision.transforms.v2.RandomHorizontalFlip"}
+        result = _configure_input_size(cfg, (224, 224))
+        assert result == cfg
+
+
+class TestCPUAugmentationPipelineForward:
+    """Tests for forward pass."""
+
+    def test_empty_pipeline_passthrough(self):
+        pipeline = CPUAugmentationPipeline()
+        sample = _make_sample()
+        result = pipeline(sample)
+        assert result is sample
+
+    def test_native_torchvision_transform_applied(self):
+        """Native torchvision transform should modify the image."""
+        pipeline = CPUAugmentationPipeline([tvt_v2.ToDtype(torch.float32, scale=True)])
+        sample = _make_sample(dtype=torch.uint8)
+        result = pipeline(sample)
+        assert result.image.dtype == torch.float32
+        assert result.image.max() <= 1.0
+
+    def test_custom_transform_called(self):
+        """OTX-style transform (non-torchvision) should be called with sample."""
+
+        class _IdentityTransform(nn.Module):
+            def forward(self, sample):  # noqa: ANN202
+                sample.image = sample.image.float() / 255.0
+                return sample
+
+        pipeline = CPUAugmentationPipeline([_IdentityTransform()])
+        sample = _make_sample(dtype=torch.uint8)
+        result = pipeline(sample)
+        assert result.image.dtype == torch.float32
+
+    def test_none_return_propagates(self):
+        """If a transform returns None, forward should return None."""
+
+        class _NoneTransform(nn.Module):
+            def forward(self, sample) -> None:
+                return None
+
+        pipeline = CPUAugmentationPipeline([_NoneTransform()])
+        sample = _make_sample()
+        result = pipeline(sample)
+        assert result is None
+
+    def test_native_transform_detection(self):
+        """Check _is_native_torchvision_transform correctly identifies transforms."""
+        pipeline = CPUAugmentationPipeline()
+        flip = tvt_v2.RandomHorizontalFlip(p=0.5)
+        assert pipeline._is_native_torchvision_transform(flip)
+
+        custom = nn.Identity()
+        assert not pipeline._is_native_torchvision_transform(custom)
+
+    def test_apply_native_transform_image_only(self):
+        """_apply_native_transform with image-only sample."""
+        pipeline = CPUAugmentationPipeline()
+        sample = _SimpleSample(image=_make_image(32, 32, dtype=torch.uint8))
+        transform = tvt_v2.ToDtype(torch.float32, scale=True)
+        result = pipeline._apply_native_transform(transform, sample)  # type: ignore[arg-type]
+        assert result.image.dtype == torch.float32
+
+    def test_apply_native_transform_with_bboxes(self):
+        """_apply_native_transform preserves bboxes through spatial transforms."""
+        pipeline = CPUAugmentationPipeline()
+        sample = _make_sample(64, 64, with_bboxes=True)
+        # Use a transform that should keep image size the same
+        transform = tvt_v2.RandomHorizontalFlip(p=1.0)  # Always flip
+        result = pipeline._apply_native_transform(transform, sample)  # type: ignore[arg-type]
+        assert result.bboxes is not None  # type: ignore[union-attr]
+
+    def test_apply_native_transform_empty_sample(self):
+        """Sample with no transformable fields returns unchanged."""
+        pipeline = CPUAugmentationPipeline()
+        sample = _SimpleSample(image=None)  # type: ignore[arg-type]
+        transform = tvt_v2.RandomHorizontalFlip(p=0.5)
+        result = pipeline._apply_native_transform(transform, sample)  # type: ignore[arg-type]
+        assert result is sample
+
+    def test_repr(self):
+        transforms: list[nn.Module] = [tvt_v2.RandomHorizontalFlip(p=0.5)]
+        pipeline = CPUAugmentationPipeline(transforms)
+        r = repr(pipeline)
+        assert "CPUAugmentationPipeline" in r
+        assert "RandomHorizontalFlip" in r
+
+
+class TestCPUAugmentationPipelineIntensityIntegration:
+    """Tests that intensity transforms integrate correctly into the CPU pipeline."""
+
+    def test_scale_to_unit_in_pipeline(self):
+        """uint8 scale_to_unit through a full pipeline."""
+        config = SubsetConfig(
+            augmentations_cpu=[],
+            intensity=IntensityConfig(storage_dtype="uint8", mode="scale_to_unit"),
+            input_size=None,
+        )
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        sample = _make_sample(32, 32, dtype=torch.uint8)
+        result = pipeline(sample)
+        assert result.image.dtype == torch.float32
+        assert result.image.min() >= 0.0
+        assert result.image.max() <= 1.0
+
+    def test_uint16_scale_to_unit_in_pipeline(self):
+        """uint16 data with scale_to_unit mode."""
+        config = SubsetConfig(
+            augmentations_cpu=[],
+            intensity=IntensityConfig(
+                storage_dtype="uint16",
+                mode="scale_to_unit",
+                max_value=65535.0,
+            ),
+            input_size=None,
+        )
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        sample = _SimpleSample(image=torch.randint(0, 65536, (3, 32, 32), dtype=torch.int32))
+        result = pipeline(sample)
+        assert result.image.dtype == torch.float32
+        assert result.image.min() >= 0.0
+        assert result.image.max() <= 1.0
+
+    def test_intensity_then_augmentation(self):
+        """Intensity transform followed by spatial augmentation."""
+        config = SubsetConfig(
+            augmentations_cpu=[
+                {
+                    "class_path": "torchvision.transforms.v2.Resize",
+                    "init_args": {"size": [16, 16]},
+                },
+            ],
+            intensity=IntensityConfig(storage_dtype="uint8", mode="scale_to_unit"),
+            input_size=None,
+        )
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        sample = _make_sample(64, 64, dtype=torch.uint8)
+        result = pipeline(sample)
+        assert result.image.shape == (3, 16, 16)
+        assert result.image.dtype == torch.float32
+
+    def test_repeat_channels_in_pipeline(self):
+        """Intensity config with repeat_channels should replicate single-channel to 3."""
+        config = SubsetConfig(
+            augmentations_cpu=[],
+            intensity=IntensityConfig(
+                storage_dtype="uint16",
+                mode="scale_to_unit",
+                max_value=65535.0,
+                repeat_channels=3,
+            ),
+            input_size=None,
+        )
+        pipeline = CPUAugmentationPipeline.from_config(config)
+        # Single-channel uint16 image
+        sample = _SimpleSample(image=torch.randint(0, 65536, (1, 32, 32), dtype=torch.int32))
+        result = pipeline(sample)
+        assert result.image.shape == (3, 32, 32)
+        assert result.image.dtype == torch.float32
+
+
+# ===================================================================
+# GPUAugmentationPipeline tests
+# ===================================================================
+
+
+class TestGPUAugmentationPipelineInit:
+    """Tests for GPUAugmentationPipeline construction."""
+
+    def test_empty_pipeline(self):
+        pipeline = GPUAugmentationPipeline()
+        assert pipeline.aug_sequential is None
+        assert pipeline.mean is None
+        assert pipeline.std is None
+
+    def test_pipeline_with_augmentations(self):
+        augs: list[nn.Module] = [kornia_aug.RandomHorizontalFlip(p=0.5)]
+        pipeline = GPUAugmentationPipeline(augs)
+        assert pipeline.aug_sequential is not None
+
+    def test_pipeline_is_nn_module(self):
+        pipeline = GPUAugmentationPipeline()
+        assert isinstance(pipeline, nn.Module)
+
+    def test_data_keys_default(self):
+        pipeline = GPUAugmentationPipeline()
+        assert pipeline.data_keys == ["input"]
+
+    def test_custom_data_keys(self):
+        pipeline = GPUAugmentationPipeline([], data_keys=["input", "bbox_xyxy", "mask"])
+        assert pipeline.data_keys == ["input", "bbox_xyxy", "mask"]
+
+
+class TestGPUAugmentationPipelineNormalization:
+    """Tests for normalization parameter extraction."""
+
+    def test_extract_norm_from_kornia_normalize(self):
+        mean = torch.tensor([0.485, 0.456, 0.406])
+        std = torch.tensor([0.229, 0.224, 0.225])
+        augs: list[nn.Module] = [kornia_aug.Normalize(mean=mean, std=std)]
+        pipeline = GPUAugmentationPipeline(augs)
+        assert pipeline.mean is not None
+        assert pipeline.std is not None
+        assert len(pipeline.mean) == 3
+        assert len(pipeline.std) == 3
+        assert abs(pipeline.mean[0] - 0.485) < 1e-4
+
+    def test_no_normalize_returns_none(self):
+        augs: list[nn.Module] = [kornia_aug.RandomHorizontalFlip(p=0.5)]
+        pipeline = GPUAugmentationPipeline(augs)
+        assert pipeline.mean is None
+        assert pipeline.std is None
+
+    def test_extract_norm_among_others(self):
+        """Normalization params found even when mixed with other augs."""
+        mean = torch.tensor([0.5, 0.5, 0.5])
+        std = torch.tensor([0.25, 0.25, 0.25])
+        augs: list[nn.Module] = [kornia_aug.RandomHorizontalFlip(p=0.5), kornia_aug.Normalize(mean=mean, std=std)]
+        pipeline = GPUAugmentationPipeline(augs)
+        assert pipeline.mean is not None
+        assert abs(pipeline.mean[0] - 0.5) < 1e-4
+
+
+class TestGPUAugmentationPipelineListAvailable:
+    """Tests for list_available_transforms."""
+
+    def test_returns_list(self):
+        result = GPUAugmentationPipeline.list_available_transforms()
+        assert isinstance(result, list)
+        assert len(result) > 0
+
+
+class TestGPUAugmentationPipelineDispatchTransform:
+    """Tests for _dispatch_transform."""
+
+    def test_dict_config(self):
+        cfg = {
+            "class_path": "kornia.augmentation.RandomHorizontalFlip",
+            "init_args": {"p": 0.5},
+        }
+        result = GPUAugmentationPipeline._dispatch_transform(cfg)
+        assert isinstance(result, kornia_aug.RandomHorizontalFlip)
+
+    def test_already_instantiated(self):
+        aug = kornia_aug.RandomHorizontalFlip(p=0.5)
+        result = GPUAugmentationPipeline._dispatch_transform(aug)
+        assert result is aug
+
+    def test_unsupported_type_raises(self):
+        with pytest.raises(TypeError, match="GPUAugmentationPipeline accepts only"):
+            GPUAugmentationPipeline._dispatch_transform("bad_value")  # type: ignore[arg-type]
+
+
+class TestGPUAugmentationPipelineFromConfig:
+    """Tests for from_config."""
+
+    def test_empty_config(self):
+        config = SubsetConfig(augmentations_gpu=[], input_size=None)
+        pipeline = GPUAugmentationPipeline.from_config(config)
+        assert pipeline.aug_sequential is None
+
+    def test_with_augmentations(self):
+        config = SubsetConfig(
+            augmentations_gpu=[
+                {
+                    "class_path": "kornia.augmentation.RandomHorizontalFlip",
+                    "init_args": {"p": 0.5},
+                },
+            ],
+            input_size=None,
+        )
+        pipeline = GPUAugmentationPipeline.from_config(config)
+        assert pipeline.aug_sequential is not None
+
+    def test_nn_module_passthrough(self):
+        aug = kornia_aug.RandomHorizontalFlip(p=1.0)
+        config = SubsetConfig(augmentations_gpu=[aug], input_size=None)  # type: ignore[arg-type]
+        pipeline = GPUAugmentationPipeline.from_config(config)
+        assert pipeline.aug_sequential is not None
+
+    def test_unsupported_config_type_raises(self):
+        config = SubsetConfig(augmentations_gpu=["bad_value"], input_size=None)  # type: ignore[arg-type]
+        with pytest.raises(TypeError, match="Unsupported augmentation config type"):
+            GPUAugmentationPipeline.from_config(config)
+
+    def test_custom_data_keys(self):
+        config = SubsetConfig(
+            augmentations_gpu=[
+                {
+                    "class_path": "kornia.augmentation.RandomHorizontalFlip",
+                    "init_args": {"p": 0.5},
+                },
+            ],
+            input_size=None,
+        )
+        pipeline = GPUAugmentationPipeline.from_config(config, data_keys=["input", "bbox_xyxy"])
+        assert pipeline.data_keys == ["input", "bbox_xyxy"]
+
+
+class TestGPUAugmentationPipelineForward:
+    """Tests for GPU pipeline forward pass."""
+
+    def test_empty_pipeline_passthrough(self):
+        pipeline = GPUAugmentationPipeline()
+        images = _make_batched_images(2)
+        result = pipeline(images)
+        assert result["images"] is images
+        assert result["labels"] is None
+        assert result["bboxes"] is None
+        assert result["masks"] is None
+
+    def test_image_only_augmentation(self):
+        """Single-key augmentation modifying only images."""
+        pipeline = GPUAugmentationPipeline(
+            [kornia_aug.RandomHorizontalFlip(p=1.0)],
+            data_keys=["input"],
+        )
+        images = _make_batched_images(2, h=32, w=32)
+        result = pipeline(images)
+        assert result["images"].shape == images.shape
+        # p=1.0 means always flip → images should differ
+        assert not torch.allclose(result["images"], images)
+
+    def test_normalization(self):
+        """Normalize transform changes pixel range."""
+        mean = torch.tensor([0.5, 0.5, 0.5])
+        std = torch.tensor([0.25, 0.25, 0.25])
+        pipeline = GPUAugmentationPipeline(
+            [kornia_aug.Normalize(mean=mean, std=std)],
+            data_keys=["input"],
+        )
+        images = torch.full((2, 3, 8, 8), 0.5)  # All pixels = 0.5
+        result = pipeline(images)
+        # (0.5 - 0.5) / 0.25 = 0.0
+        assert torch.allclose(result["images"], torch.zeros_like(images), atol=1e-5)
+
+    def test_forward_with_masks(self):
+        """Forward with mask data key."""
+        pipeline = GPUAugmentationPipeline(
+            [kornia_aug.RandomHorizontalFlip(p=1.0)],
+            data_keys=["input", "mask"],
+        )
+        images = _make_batched_images(2, h=16, w=16)
+        masks = [torch.randint(0, 2, (1, 16, 16), dtype=torch.float32) for _ in range(2)]
+        result = pipeline(images, masks=masks)
+        assert result["images"] is not None
+        assert result["masks"] is not None
+
+    def test_forward_preserves_batch_size(self):
+        pipeline = GPUAugmentationPipeline(
+            [kornia_aug.RandomHorizontalFlip(p=0.5)],
+            data_keys=["input"],
+        )
+        images = _make_batched_images(4, h=16, w=16)
+        result = pipeline(images)
+        assert result["images"].shape[0] == 4
+
+    def test_repr_empty(self):
+        pipeline = GPUAugmentationPipeline()
+        r = repr(pipeline)
+        assert "GPUAugmentationPipeline" in r
+        assert "empty" in r
+
+    def test_repr_with_augs(self):
+        pipeline = GPUAugmentationPipeline([kornia_aug.RandomHorizontalFlip(p=0.5)])
+        r = repr(pipeline)
+        assert "GPUAugmentationPipeline" in r
+
+    def test_repr_with_normalization(self):
+        pipeline = GPUAugmentationPipeline(
+            [kornia_aug.Normalize(mean=torch.tensor([0.5, 0.5, 0.5]), std=torch.tensor([0.5, 0.5, 0.5]))]
+        )
+        r = repr(pipeline)
+        assert "mean=" in r
+        assert "std=" in r
+
+
+# ===================================================================
+# CPU/GPU Hybrid Integration Tests
+# ===================================================================
+
+
+class TestHybridCPUGPUPipeline:
+    """Integration tests for the CPU→collate→GPU augmentation flow."""
+
+    def test_cpu_to_gpu_uint8_flow(self):
+        """Standard uint8: CPU scales to float → GPU augments."""
+        cpu_config = SubsetConfig(
+            augmentations_cpu=[],
+            intensity=IntensityConfig(storage_dtype="uint8", mode="scale_to_unit"),
+            input_size=None,
+        )
+        gpu_config = SubsetConfig(
+            augmentations_gpu=[
+                {
+                    "class_path": "kornia.augmentation.RandomHorizontalFlip",
+                    "init_args": {"p": 1.0},
+                },
+            ],
+            input_size=None,
+        )
+
+        cpu_pipeline = CPUAugmentationPipeline.from_config(cpu_config)
+        gpu_pipeline = GPUAugmentationPipeline.from_config(gpu_config)
+
+        # Simulate per-sample CPU processing
+        samples = []
+        for _ in range(4):
+            sample = _make_sample(32, 32, dtype=torch.uint8)
+            result = cpu_pipeline(sample)
+            samples.append(result)
+
+        # Simulate collate → batch
+        batch_images = torch.stack([s.image for s in samples])
+        assert batch_images.dtype == torch.float32
+        assert batch_images.min() >= 0.0
+        assert batch_images.max() <= 1.0
+
+        # GPU stage
+        gpu_result = gpu_pipeline(batch_images)
+        assert gpu_result["images"].shape == batch_images.shape
+        assert gpu_result["images"].dtype == torch.float32
+
+    def test_cpu_to_gpu_uint16_flow(self):
+        """uint16 thermal: CPU intensity maps → GPU augments + normalizes."""
+        cpu_config = SubsetConfig(
+            augmentations_cpu=[],
+            intensity=IntensityConfig(
+                storage_dtype="uint16",
+                mode="range_scale",
+                scale_factor=0.4,
+                min_value=295.15,
+                max_value=360.15,
+                repeat_channels=3,
+            ),
+            input_size=None,
+        )
+        gpu_config = SubsetConfig(
+            augmentations_gpu=[
+                {
+                    "class_path": "kornia.augmentation.Normalize",
+                    "init_args": {
+                        "mean": [0.5, 0.5, 0.5],
+                        "std": [0.25, 0.25, 0.25],
+                    },
+                },
+            ],
+            input_size=None,
+        )
+
+        cpu_pipeline = CPUAugmentationPipeline.from_config(cpu_config)
+        gpu_pipeline = GPUAugmentationPipeline.from_config(gpu_config)
+
+        # Simulate thermal sensor data (raw uint16 range ~738-900)
+        raw_data = torch.randint(738, 901, (1, 32, 32), dtype=torch.int32)
+
+        sample = _SimpleSample(image=raw_data)
+        result = cpu_pipeline(sample)
+
+        # After intensity mapping: float32, 3-channel, [0, 1]
+        assert result.image.dtype == torch.float32
+        assert result.image.shape[0] == 3  # repeat_channels=3
+        assert result.image.min() >= 0.0
+        assert result.image.max() <= 1.0
+
+        # Simulate batch
+        batch_images = result.image.unsqueeze(0)  # (1, 3, 32, 32)
+        gpu_result = gpu_pipeline(batch_images)
+        assert gpu_result["images"].shape == (1, 3, 32, 32)
+        assert gpu_result["images"].dtype == torch.float32
+
+    def test_cpu_resize_gpu_normalize(self):
+        """CPU resizes, GPU normalizes — common production pattern."""
+        cpu_config = SubsetConfig(
+            augmentations_cpu=[
+                {
+                    "class_path": "torchvision.transforms.v2.Resize",
+                    "init_args": {"size": [64, 64]},
+                },
+            ],
+            intensity=IntensityConfig(storage_dtype="uint8", mode="scale_to_unit"),
+            input_size=None,
+        )
+        gpu_config = SubsetConfig(
+            augmentations_gpu=[
+                {
+                    "class_path": "kornia.augmentation.Normalize",
+                    "init_args": {
+                        "mean": [0.485, 0.456, 0.406],
+                        "std": [0.229, 0.224, 0.225],
+                    },
+                },
+            ],
+            input_size=None,
+        )
+
+        cpu_pipeline = CPUAugmentationPipeline.from_config(cpu_config)
+        gpu_pipeline = GPUAugmentationPipeline.from_config(gpu_config)
+
+        # Per-sample CPU processing
+        sample = _make_sample(128, 128, dtype=torch.uint8)
+        result = cpu_pipeline(sample)
+        assert result.image.shape == (3, 64, 64)  # Resized
+        assert result.image.dtype == torch.float32
+
+        # Batch and GPU
+        batch_images = result.image.unsqueeze(0)
+        gpu_result = gpu_pipeline(batch_images)
+        assert gpu_result["images"].shape == (1, 3, 64, 64)
+        # Normalization should shift values away from [0, 1]
+        assert gpu_result["images"].min() < 0.0 or gpu_result["images"].max() > 1.0
+
+    def test_cpu_empty_gpu_only_flow(self):
+        """No CPU augmentations, only GPU — image should still be processed."""
+        gpu_config = SubsetConfig(
+            augmentations_gpu=[
+                {
+                    "class_path": "kornia.augmentation.RandomHorizontalFlip",
+                    "init_args": {"p": 1.0},
+                },
+            ],
+            input_size=None,
+        )
+        gpu_pipeline = GPUAugmentationPipeline.from_config(gpu_config)
+
+        # Manually create float batch
+        images = _make_batched_images(2, h=16, w=16)
+        result = gpu_pipeline(images)
+        assert result["images"].shape == images.shape
+        # Flipped → should not equal original
+        assert not torch.allclose(result["images"], images)
+
+    def test_gpu_normalization_params_for_model(self):
+        """GPU pipeline should expose mean/std for model export."""
+        mean = [0.485, 0.456, 0.406]
+        std = [0.229, 0.224, 0.225]
+        gpu_config = SubsetConfig(
+            augmentations_gpu=[
+                {
+                    "class_path": "kornia.augmentation.Normalize",
+                    "init_args": {"mean": mean, "std": std},
+                },
+            ],
+            input_size=None,
+        )
+        gpu_pipeline = GPUAugmentationPipeline.from_config(gpu_config)
+        assert gpu_pipeline.mean is not None
+        assert gpu_pipeline.std is not None
+        for i in range(3):
+            assert abs(gpu_pipeline.mean[i] - mean[i]) < 1e-4
+            assert abs(gpu_pipeline.std[i] - std[i]) < 1e-4
+
+    def test_full_train_pipeline_simulation(self):
+        """Simulate a full training step: CPU aug → collate → GPU aug."""
+        # Build pipelines
+        cpu_config = SubsetConfig(
+            augmentations_cpu=[
+                {
+                    "class_path": "torchvision.transforms.v2.RandomResizedCrop",
+                    "init_args": {"size": [32, 32]},
+                },
+            ],
+            intensity=IntensityConfig(storage_dtype="uint8", mode="scale_to_unit"),
+            input_size=None,
+        )
+        gpu_config = SubsetConfig(
+            augmentations_gpu=[
+                {
+                    "class_path": "kornia.augmentation.RandomHorizontalFlip",
+                    "init_args": {"p": 0.5},
+                },
+                {
+                    "class_path": "kornia.augmentation.Normalize",
+                    "init_args": {
+                        "mean": [0.485, 0.456, 0.406],
+                        "std": [0.229, 0.224, 0.225],
+                    },
+                },
+            ],
+            input_size=None,
+        )
+
+        cpu_pipeline = CPUAugmentationPipeline.from_config(cpu_config)
+        gpu_pipeline = GPUAugmentationPipeline.from_config(gpu_config)
+
+        # CPU: per-sample processing (simulates Dataset.__getitem__)
+        batch_images = []
+        for _ in range(4):
+            sample = _make_sample(64, 64, dtype=torch.uint8)
+            result = cpu_pipeline(sample)
+            assert result.image.shape == (3, 32, 32)
+            assert result.image.dtype == torch.float32
+            batch_images.append(result.image)
+
+        # Collate: stack into batch
+        batch = torch.stack(batch_images)
+        assert batch.shape == (4, 3, 32, 32)
+
+        # GPU: batch-level augmentation
+        gpu_result = gpu_pipeline(batch)
+        assert gpu_result["images"].shape == (4, 3, 32, 32)
+        assert gpu_result["images"].dtype == torch.float32
+
+
+# ===================================================================
+# GPUAugmentationCallback tests
+# ===================================================================
+
+
+class TestGPUAugmentationCallback:
+    """Tests for the Lightning Callback that orchestrates GPU augmentations."""
+
+    def _make_callback(self, train_augs=None, val_augs=None, test_augs=None):  # noqa: ANN202
+        """Create a GPUAugmentationCallback with optional configs."""
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
+
+        train_config = SubsetConfig(augmentations_gpu=train_augs or [])
+        val_config = SubsetConfig(augmentations_gpu=val_augs or [])
+        test_config = SubsetConfig(augmentations_gpu=test_augs or []) if test_augs else None
+        return GPUAugmentationCallback(
+            train_config=train_config,
+            val_config=val_config,
+            test_config=test_config,
+        )
+
+    def test_init_defaults(self):
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
+
+        callback = GPUAugmentationCallback()
+        assert callback.train_config is None
+        assert callback.val_config is None
+        assert callback.test_config is None
+        assert callback._train_pipeline is None
+        assert callback._val_pipeline is None
+        assert callback._test_pipeline is None
+
+    def test_setup_creates_pipelines(self):
+        """setup() should create train and val pipelines."""
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
+        from otx.types.task import OTXTaskType
+
+        train_config = SubsetConfig(
+            augmentations_gpu=[
+                {"class_path": "kornia.augmentation.RandomHorizontalFlip", "init_args": {"p": 0.5}},
+            ],
+        )
+        val_config = SubsetConfig(
+            augmentations_gpu=[
+                {
+                    "class_path": "kornia.augmentation.Normalize",
+                    "init_args": {"mean": [0.5, 0.5, 0.5], "std": [0.5, 0.5, 0.5]},
+                },
+            ],
+        )
+
+        callback = GPUAugmentationCallback(train_config=train_config, val_config=val_config)
+
+        # Create mock module with required attributes
+        pl_module = MagicMock()
+        pl_module.task = OTXTaskType.DETECTION
+        pl_module.data_input_params = MagicMock()
+        pl_module.data_input_params.mean = None
+        pl_module.data_input_params.std = None
+
+        trainer = MagicMock()
+        callback.setup(trainer, pl_module, stage="fit")
+
+        assert callback._train_pipeline is not None
+        assert callback._val_pipeline is not None
+
+    def test_setup_updates_model_normalization(self):
+        """setup() should update model's mean/std from GPU pipeline."""
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
+        from otx.types.task import OTXTaskType
+
+        val_config = SubsetConfig(
+            augmentations_gpu=[
+                {
+                    "class_path": "kornia.augmentation.Normalize",
+                    "init_args": {
+                        "mean": [0.485, 0.456, 0.406],
+                        "std": [0.229, 0.224, 0.225],
+                    },
+                },
+            ],
+        )
+
+        callback = GPUAugmentationCallback(val_config=val_config)
+
+        pl_module = MagicMock()
+        pl_module.task = OTXTaskType.MULTI_CLASS_CLS
+        pl_module.data_input_params = MagicMock()
+        pl_module.data_input_params.mean = None
+        pl_module.data_input_params.std = None
+
+        trainer = MagicMock()
+        callback.setup(trainer, pl_module, stage="fit")
+
+        # Model's mean/std should have been updated
+        assert pl_module.data_input_params.mean is not None
+        assert pl_module.data_input_params.std is not None
+
+    def test_on_train_batch_start_no_pipeline(self):
+        """If no train pipeline, on_train_batch_start should be a no-op."""
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
+
+        callback = GPUAugmentationCallback()
+        batch = MagicMock()
+        # Should not raise
+        callback.on_train_batch_start(MagicMock(), MagicMock(), batch, batch_idx=0)
+
+    def test_on_val_batch_start_disabled(self):
+        """If no val pipeline, validation batches should not be augmented."""
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
+
+        callback = GPUAugmentationCallback()
+        # _val_pipeline is None by default
+        assert callback._val_pipeline is None
+        batch = MagicMock()
+        callback.on_validation_batch_start(MagicMock(), MagicMock(), batch, batch_idx=0)
+        # Should still be None (no pipeline was created)
+        assert callback._val_pipeline is None
+
+    def test_on_test_batch_start_disabled(self):
+        """If no test pipeline, test batches should not be augmented."""
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
+
+        callback = GPUAugmentationCallback()
+        # _test_pipeline is None by default
+        batch = MagicMock()
+        callback.on_test_batch_start(MagicMock(), MagicMock(), batch, batch_idx=0)
+        # No error, batch not modified
+
+    def test_test_config_fallback_to_val(self):
+        """If test_config is None, it should fall back to val_config."""
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
+
+        val_config = SubsetConfig(augmentations_gpu=[])
+        callback = GPUAugmentationCallback(val_config=val_config, test_config=None)
+        assert callback.test_config is val_config
+
+    def test_data_keys_per_task(self):
+        """Verify correct data_keys are used for different task types."""
+        from otx.backend.native.callbacks.gpu_augmentation import GPUAugmentationCallback
+        from otx.types.task import OTXTaskType
+
+        expected_keys = {
+            OTXTaskType.DETECTION: ["input", "bbox_xyxy", "label"],
+            OTXTaskType.INSTANCE_SEGMENTATION: ["input", "bbox_xyxy", "mask", "label"],
+            OTXTaskType.SEMANTIC_SEGMENTATION: ["input", "mask"],
+            OTXTaskType.MULTI_CLASS_CLS: ["input", "label"],
+        }
+
+        for task_type, expected in expected_keys.items():
+            data_keys = ["input", *GPUAugmentationCallback._DATA_KEYS_BY_TASK.get(task_type, [])]
+            assert data_keys == expected, f"Mismatch for {task_type}: {data_keys} != {expected}"
diff --git a/library/tests/unit/data/augmentation/test_pipeline_internals.py b/library/tests/unit/data/augmentation/test_pipeline_internals.py
new file mode 100644
index 00000000000..de1c606ffc1
--- /dev/null
+++ b/library/tests/unit/data/augmentation/test_pipeline_internals.py
@@ -0,0 +1,290 @@
+# Copyright (C) 2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""Unit tests for _IntensityAdapter, _sanitize_annotations, and GPU pipeline edge cases."""
+
+from __future__ import annotations
+
+import pytest
+import torch
+from torch import nn
+
+from otx.data.augmentation.pipeline import (
+    _DTYPE_TO_BIT_DEPTH,
+    GPUAugmentationPipeline,
+    _IntensityAdapter,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+class _IdentityTransform(nn.Module):
+    """No-op transform for testing _IntensityAdapter wrapping."""
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x
+
+
+class _ScaleTransform(nn.Module):
+    """Multiply by 0.5 for testing."""
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x * 0.5
+
+
+class _SimpleSample:
+    """Minimal duck-type of OTXSample for testing."""
+
+    def __init__(self, image: torch.Tensor, img_info: object | None = None) -> None:
+        self.image = image
+        self.img_info = img_info
+
+
+class _SimpleImgInfo:
+    """Minimal duck-type of ImageInfo for testing."""
+
+    def __init__(self) -> None:
+        self.bit_depth: int | None = None
+
+
+# =====================================================================
+# _DTYPE_TO_BIT_DEPTH mapping
+# =====================================================================
+class TestDtypeToBitDepth:
+    def test_uint8(self):
+        assert _DTYPE_TO_BIT_DEPTH["uint8"] == 8
+
+    def test_uint16(self):
+        assert _DTYPE_TO_BIT_DEPTH["uint16"] == 16
+
+    def test_int16(self):
+        assert _DTYPE_TO_BIT_DEPTH["int16"] == 16
+
+    def test_float32(self):
+        assert _DTYPE_TO_BIT_DEPTH["float32"] == 32
+
+
+# =====================================================================
+# _IntensityAdapter Tests
+# =====================================================================
+class TestIntensityAdapter:
+    """Tests for the _IntensityAdapter wrapper."""
+
+    def test_stamps_bit_depth_uint8(self):
+        adapter = _IntensityAdapter(_IdentityTransform(), storage_dtype="uint8")
+        img_info = _SimpleImgInfo()
+        sample = _SimpleSample(image=torch.rand(3, 8, 8), img_info=img_info)
+        result = adapter(sample)  # type: ignore[arg-type]
+        assert result.img_info.bit_depth == 8
+
+    def test_stamps_bit_depth_uint16(self):
+        adapter = _IntensityAdapter(_IdentityTransform(), storage_dtype="uint16")
+        img_info = _SimpleImgInfo()
+        sample = _SimpleSample(image=torch.rand(3, 8, 8), img_info=img_info)
+        adapter(sample)  # type: ignore[arg-type]
+        assert img_info.bit_depth == 16
+
+    def test_stamps_bit_depth_float32(self):
+        adapter = _IntensityAdapter(_IdentityTransform(), storage_dtype="float32")
+        img_info = _SimpleImgInfo()
+        sample = _SimpleSample(image=torch.rand(3, 8, 8), img_info=img_info)
+        adapter(sample)  # type: ignore[arg-type]
+        assert img_info.bit_depth == 32
+
+    def test_unknown_dtype_defaults_to_8(self):
+        adapter = _IntensityAdapter(_IdentityTransform(), storage_dtype="bfloat16")
+        assert adapter.bit_depth == 8
+
+    def test_applies_inner_transform(self):
+        adapter = _IntensityAdapter(_ScaleTransform(), storage_dtype="uint8")
+        img = torch.ones(3, 4, 4)
+        sample = _SimpleSample(image=img, img_info=_SimpleImgInfo())
+        result = adapter(sample)  # type: ignore[arg-type]
+        assert torch.allclose(result.image, torch.full_like(img, 0.5))
+
+    def test_no_img_info_does_not_crash(self):
+        """When img_info is None, bit_depth stamping is skipped (no error)."""
+        adapter = _IntensityAdapter(_IdentityTransform(), storage_dtype="uint16")
+        sample = _SimpleSample(image=torch.rand(3, 4, 4), img_info=None)
+        result = adapter(sample)  # type: ignore[arg-type]
+        assert result.img_info is None  # no crash
+
+    def test_is_nn_module(self):
+        adapter = _IntensityAdapter(_IdentityTransform())
+        assert isinstance(adapter, nn.Module)
+
+    def test_inner_transform_accessible(self):
+        inner = _IdentityTransform()
+        adapter = _IntensityAdapter(inner, storage_dtype="uint8")
+        assert adapter.transform is inner
+
+
+# =====================================================================
+# _sanitize_annotations Tests
+# =====================================================================
+class TestSanitizeAnnotations:
+    """Tests for GPUAugmentationPipeline._sanitize_annotations."""
+
+    @pytest.fixture
+    def pipeline(self) -> GPUAugmentationPipeline:
+        return GPUAugmentationPipeline()
+
+    def test_bboxes_none_returns_all_none(self, pipeline: GPUAugmentationPipeline):
+        """When bboxes=None, everything returned unchanged."""
+        b, lab, m, k = pipeline._sanitize_annotations(
+            torch.rand(2, 3, 32, 32),
+            bboxes=None,
+            labels=None,
+            masks=None,
+            keypoints=None,
+        )
+        assert b is None
+        assert lab is None
+
+    def test_clips_bboxes_to_image_bounds(self, pipeline: GPUAugmentationPipeline):
+        """Bboxes extending outside image should be clamped."""
+        images = torch.rand(1, 3, 64, 64)
+        bboxes = [torch.tensor([[-10.0, -5.0, 50.0, 50.0], [20.0, 20.0, 80.0, 80.0]])]
+        labels = [torch.tensor([0, 1])]
+        out_b, out_l, _, _ = pipeline._sanitize_annotations(images, bboxes, labels, None, None)
+        assert out_b is not None
+        # First bbox should be clamped to [0, 0, 50, 50]
+        # All bboxes coordinates should be within bounds
+        for b in out_b:
+            if b.numel() > 0:
+                assert (b[:, 0] >= 0).all()
+                assert (b[:, 1] >= 0).all()
+                assert (b[:, 2] <= 64).all()
+                assert (b[:, 3] <= 64).all()
+
+    def test_removes_tiny_bboxes(self, pipeline: GPUAugmentationPipeline):
+        """Bboxes with width/height below min_size should be filtered out."""
+        images = torch.rand(1, 3, 64, 64)
+        bboxes = [torch.tensor([[10.0, 10.0, 50.0, 50.0], [10.0, 10.0, 12.0, 12.0]])]
+        labels = [torch.tensor([0, 1])]
+        out_b, out_l, _, _ = pipeline._sanitize_annotations(
+            images, bboxes, labels, None, None, min_size=4.0, min_area=16.0
+        )
+        # Second bbox has width=2, height=2 → filtered out
+        assert out_b is not None
+        assert len(out_b[0]) == 1
+        assert out_l is not None
+        assert len(out_l[0]) == 1
+
+    def test_labels_filtered_in_lockstep(self, pipeline: GPUAugmentationPipeline):
+        """When bboxes are removed, corresponding labels should also be removed."""
+        images = torch.rand(1, 3, 64, 64)
+        bboxes = [torch.tensor([[10.0, 10.0, 50.0, 50.0], [10.0, 10.0, 11.0, 11.0]])]
+        labels = [torch.tensor([42, 99])]
+        out_b, out_l, _, _ = pipeline._sanitize_annotations(images, bboxes, labels, None, None)
+        assert out_l is not None
+        assert out_l[0].tolist() == [42]
+
+    def test_masks_filtered_with_bboxes(self, pipeline: GPUAugmentationPipeline):
+        """Instance masks should be filtered by the same valid mask."""
+        images = torch.rand(1, 3, 64, 64)
+        bboxes = [torch.tensor([[10.0, 10.0, 50.0, 50.0], [10.0, 10.0, 11.0, 11.0]])]
+        labels = [torch.tensor([0, 1])]
+        masks = [torch.rand(2, 64, 64)]  # 2 instance masks
+        out_b, out_l, out_m, _ = pipeline._sanitize_annotations(images, bboxes, labels, masks, None)
+        assert out_m is not None
+        assert out_m[0].shape[0] == 1  # only first mask kept
+
+    def test_empty_bboxes(self, pipeline: GPUAugmentationPipeline):
+        """Empty bboxes should not crash."""
+        images = torch.rand(1, 3, 32, 32)
+        bboxes = [torch.zeros(0, 4)]
+        labels = [torch.zeros(0, dtype=torch.long)]
+        out_b, out_l, _, _ = pipeline._sanitize_annotations(images, bboxes, labels, None, None)
+        assert out_b is not None
+        assert len(out_b[0]) == 0
+
+    def test_batch_mismatch_raises(self, pipeline: GPUAugmentationPipeline):
+        images = torch.rand(2, 3, 32, 32)
+        bboxes = [torch.tensor([[10.0, 10.0, 50.0, 50.0]])]  # only 1 element
+        with pytest.raises(RuntimeError, match="bboxes batch mismatch"):
+            pipeline._sanitize_annotations(images, bboxes, None, None, None)
+
+    def test_labels_mismatch_raises(self, pipeline: GPUAugmentationPipeline):
+        images = torch.rand(2, 3, 32, 32)
+        bboxes = [torch.tensor([[10.0, 10.0, 50.0, 50.0]]), torch.tensor([[10.0, 10.0, 50.0, 50.0]])]
+        labels = [torch.tensor([0])]  # only 1 element
+        with pytest.raises(RuntimeError, match="labels batch mismatch"):
+            pipeline._sanitize_annotations(images, bboxes, labels, None, None)
+
+    def test_bad_bbox_shape_raises(self, pipeline: GPUAugmentationPipeline):
+        images = torch.rand(1, 3, 32, 32)
+        bboxes = [torch.tensor([10.0, 10.0, 50.0, 50.0])]  # 1D, not 2D
+        with pytest.raises(RuntimeError, match="must be .N,4."):
+            pipeline._sanitize_annotations(images, bboxes, None, None, None)
+
+    def test_keypoints_clamped(self, pipeline: GPUAugmentationPipeline):
+        """Keypoints should be clamped to image bounds."""
+        images = torch.rand(1, 3, 64, 64)
+        bboxes = [torch.tensor([[10.0, 10.0, 50.0, 50.0]])]
+        keypoints = [torch.tensor([[-5.0, 70.0]])]  # out of bounds
+        _, _, _, out_k = pipeline._sanitize_annotations(images, bboxes, None, None, keypoints)
+        assert out_k is not None
+        assert (out_k[0][..., 0] >= 0).all()
+        assert (out_k[0][..., 1] <= 64).all()
+
+    def test_nonfinite_bboxes_filtered(self, pipeline: GPUAugmentationPipeline):
+        """Non-finite (NaN/Inf) bboxes should be removed."""
+        images = torch.rand(1, 3, 64, 64)
+        bboxes = [torch.tensor([[10.0, 10.0, 50.0, 50.0], [float("nan"), 10.0, 50.0, 50.0]])]
+        labels = [torch.tensor([0, 1])]
+        out_b, out_l, _, _ = pipeline._sanitize_annotations(images, bboxes, labels, None, None)
+        assert out_b is not None
+        assert len(out_b[0]) == 1
+
+    def test_labels_size_mismatch_raises(self, pipeline: GPUAugmentationPipeline):
+        """Labels count not matching bboxes count should raise."""
+        images = torch.rand(1, 3, 64, 64)
+        bboxes = [torch.tensor([[10.0, 10.0, 50.0, 50.0]])]
+        labels = [torch.tensor([0, 1, 2])]  # 3 labels vs 1 bbox
+        with pytest.raises(RuntimeError, match="labels.*size mismatch"):
+            pipeline._sanitize_annotations(images, bboxes, labels, None, None)
+
+    def test_semantic_mask_passthrough(self, pipeline: GPUAugmentationPipeline):
+        """Semantic masks (shape doesn't match N objects) are passed through unchanged."""
+        images = torch.rand(1, 3, 64, 64)
+        bboxes = [torch.tensor([[10.0, 10.0, 50.0, 50.0]])]
+        labels = [torch.tensor([0])]
+        # Semantic mask: 3D but first dim != n_bboxes
+        masks = [torch.rand(5, 64, 64)]  # 5 != 1
+        out_b, _, out_m, _ = pipeline._sanitize_annotations(images, bboxes, labels, masks, None)
+        assert out_m is not None
+        # Semantic mask passed through unfiltered
+        assert out_m[0].shape[0] == 5
+
+
+# =====================================================================
+# GPU Pipeline - Kornia single-key normalisation fix
+# =====================================================================
+class TestGPUPipelineSingleKeyNormalise:
+    """Verify single-data-key Kornia results are normalised to a list."""
+
+    def test_single_key_preserves_batch_dim(self):
+        """Regression: single data_key should not lose the batch dimension."""
+        import kornia.augmentation as kaug
+
+        pipeline = GPUAugmentationPipeline(
+            [kaug.RandomHorizontalFlip(p=0.0)],  # p=0 → identity
+            data_keys=["input"],
+        )
+        images = torch.rand(3, 3, 16, 16)
+        result = pipeline(images)
+        assert result["images"].shape == (3, 3, 16, 16)
+
+    def test_multi_key_preserves_batch_dim(self):
+        """Multi data_key should also preserve batch dimension."""
+        import kornia.augmentation as kaug
+
+        pipeline = GPUAugmentationPipeline(
+            [kaug.RandomHorizontalFlip(p=0.0)],
+            data_keys=["input", "mask"],
+        )
+        images = torch.rand(2, 3, 16, 16)
+        masks = [torch.randint(0, 2, (1, 16, 16), dtype=torch.float32) for _ in range(2)]
+        result = pipeline(images, masks=masks)
+        assert result["images"].shape == (2, 3, 16, 16)
diff --git a/library/tests/unit/data/augmentation/test_resize.py b/library/tests/unit/data/augmentation/test_resize.py
new file mode 100644
index 00000000000..1902948c6c5
--- /dev/null
+++ b/library/tests/unit/data/augmentation/test_resize.py
@@ -0,0 +1,357 @@
+# Copyright (C) 2024-2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""Unit tests for Resize transform with aspect ratio preservation."""
+
+from __future__ import annotations
+
+from copy import deepcopy
+
+import pytest
+import torch
+from datumaro.experimental.fields import ImageInfo as DmImageInfo
+from torch import LongTensor
+from torchvision import tv_tensors
+
+from otx.data.augmentation.transforms import Resize
+from otx.data.entity.sample import (
+    DetectionSample,
+    InstanceSegmentationSample,
+)
+
+
+class TestResize:
+    """Test cases for Resize transform."""
+
+    @pytest.fixture
+    def square_image_entity(self) -> InstanceSegmentationSample:
+        """Create a square image sample with bboxes and masks."""
+        img_size = (100, 100)
+        fake_image = torch.randint(low=0, high=256, size=(3, *img_size), dtype=torch.uint8)
+        fake_bboxes = torch.tensor([[10, 10, 50, 50], [60, 20, 90, 80]], dtype=torch.float32)
+
+        # Create masks that correspond to bboxes
+        masks = torch.zeros(size=(2, *img_size), dtype=torch.uint8)
+        masks[0, 10:50, 10:50] = 1
+        masks[1, 20:80, 60:90] = 1
+
+        return InstanceSegmentationSample(
+            image=tv_tensors.Image(fake_image),
+            dm_image_info=DmImageInfo(height=img_size[0], width=img_size[1]),
+            bboxes=tv_tensors.BoundingBoxes(  # type: ignore[call-overload]
+                fake_bboxes, format=tv_tensors.BoundingBoxFormat.XYXY, canvas_size=img_size
+            ),
+            label=LongTensor([0, 1]),
+            masks=tv_tensors.Mask(masks),
+        )
+
+    @pytest.fixture
+    def wide_image_entity(self) -> InstanceSegmentationSample:
+        """Create a wide (landscape) image sample with bboxes and masks."""
+        img_size = (100, 200)  # height, width
+        fake_image = torch.randint(low=0, high=256, size=(3, *img_size), dtype=torch.uint8)
+        fake_bboxes = torch.tensor([[10, 10, 50, 50], [120, 20, 180, 80]], dtype=torch.float32)
+
+        masks = torch.zeros(size=(2, *img_size), dtype=torch.uint8)
+        masks[0, 10:50, 10:50] = 1
+        masks[1, 20:80, 120:180] = 1
+
+        return InstanceSegmentationSample(
+            image=tv_tensors.Image(fake_image),
+            dm_image_info=DmImageInfo(height=img_size[0], width=img_size[1]),
+            bboxes=tv_tensors.BoundingBoxes(  # type: ignore[call-overload]
+                fake_bboxes, format=tv_tensors.BoundingBoxFormat.XYXY, canvas_size=img_size
+            ),
+            label=LongTensor([0, 1]),
+            masks=tv_tensors.Mask(masks),
+        )
+
+    @pytest.fixture
+    def tall_image_entity(self) -> InstanceSegmentationSample:
+        """Create a tall (portrait) image sample with bboxes and masks."""
+        img_size = (200, 100)  # height, width
+        fake_image = torch.randint(low=0, high=256, size=(3, *img_size), dtype=torch.uint8)
+        fake_bboxes = torch.tensor([[10, 10, 50, 50], [60, 20, 90, 180]], dtype=torch.float32)
+
+        masks = torch.zeros(size=(2, *img_size), dtype=torch.uint8)
+        masks[0, 10:50, 10:50] = 1
+        masks[1, 20:180, 60:90] = 1
+
+        return InstanceSegmentationSample(
+            image=tv_tensors.Image(fake_image),
+            dm_image_info=DmImageInfo(height=img_size[0], width=img_size[1]),
+            bboxes=tv_tensors.BoundingBoxes(  # type: ignore[call-overload]
+                fake_bboxes, format=tv_tensors.BoundingBoxFormat.XYXY, canvas_size=img_size
+            ),
+            label=LongTensor([0, 1]),
+            masks=tv_tensors.Mask(masks),
+        )
+
+    # ==================== Standard Resize Tests ====================
+
+    def test_resize_square_to_square(self, square_image_entity: InstanceSegmentationSample) -> None:
+        """Test resizing square image to square target without aspect ratio preservation."""
+        resize = Resize(size=(64, 64), resize_targets=True, keep_aspect_ratio=False)
+        entity = deepcopy(square_image_entity)
+        orig_bboxes = entity.bboxes.clone()
+        orig_h, orig_w = entity.image.shape[-2:]
+
+        result = resize(entity)
+
+        assert result.image.shape[-2:] == (64, 64)
+        assert result.masks.shape[-2:] == (64, 64)
+        # Bboxes should be scaled proportionally
+        scale_x = 64 / orig_w
+        scale_y = 64 / orig_h
+        expected_bboxes = orig_bboxes.clone()
+        expected_bboxes[:, 0::2] = orig_bboxes[:, 0::2] * scale_x
+        expected_bboxes[:, 1::2] = orig_bboxes[:, 1::2] * scale_y
+        assert torch.allclose(result.bboxes.float(), expected_bboxes.float(), atol=1.0)
+
+    def test_resize_wide_to_square(self, wide_image_entity: InstanceSegmentationSample) -> None:
+        """Test resizing wide image to square target without aspect ratio preservation."""
+        resize = Resize(size=(64, 64), resize_targets=True, keep_aspect_ratio=False)
+        entity = deepcopy(wide_image_entity)
+
+        result = resize(entity)
+
+        assert result.image.shape[-2:] == (64, 64)
+        assert result.masks.shape[-2:] == (64, 64)
+
+    def test_resize_targets_false(self, square_image_entity: InstanceSegmentationSample) -> None:
+        """Test that resize_targets=False only resizes image."""
+        resize = Resize(size=(64, 64), resize_targets=False, keep_aspect_ratio=False)
+        entity = deepcopy(square_image_entity)
+        original_bboxes = entity.bboxes.clone()
+        original_masks_shape = entity.masks.shape[-2:]
+
+        result = resize(entity)
+
+        assert result.image.shape[-2:] == (64, 64)
+        # Bboxes and masks should be unchanged
+        assert torch.equal(result.bboxes, original_bboxes)
+        assert result.masks.shape[-2:] == original_masks_shape
+
+    # ==================== Aspect Ratio Preservation Tests ====================
+
+    def test_resize_with_aspect_ratio_square_to_square(self, square_image_entity: InstanceSegmentationSample) -> None:
+        """Test aspect ratio resize of square image to square target (no padding needed)."""
+        resize = Resize(size=(64, 64), resize_targets=True, keep_aspect_ratio=True)
+        entity = deepcopy(square_image_entity)
+        orig_bboxes = entity.bboxes.clone()
+        orig_h, orig_w = entity.image.shape[-2:]
+
+        result = resize(entity)
+
+        # Square to square: no padding needed
+        assert result.image.shape[-2:] == (64, 64)
+        assert result.masks.shape[-2:] == (64, 64)
+        # Scale is uniform for square to square
+        scale = min(64 / orig_w, 64 / orig_h)
+        expected_bboxes = orig_bboxes * scale
+        assert torch.allclose(result.bboxes.float(), expected_bboxes.float(), atol=1.0)
+
+    def test_resize_with_aspect_ratio_wide_to_square(self, wide_image_entity: InstanceSegmentationSample) -> None:
+        """Test aspect ratio resize of wide image to square target (vertical padding)."""
+        resize = Resize(size=(128, 128), resize_targets=True, keep_aspect_ratio=True)
+        entity = deepcopy(wide_image_entity)
+        orig_bboxes = entity.bboxes.clone()
+        orig_h, orig_w = entity.image.shape[-2:]  # 100, 200
+
+        result = resize(entity)
+
+        # Output should be exactly target size
+        assert result.image.shape[-2:] == (128, 128)
+        assert result.masks.shape[-2:] == (128, 128)
+
+        # Wide image (200w x 100h) -> scale by min(128/200, 128/100) = 0.64
+        # Resized: 128w x 64h, then pad bottom-right only (pad_bottom=64)
+        scale = min(128 / orig_w, 128 / orig_h)
+        new_h = round(orig_h * scale)  # 64
+        pad_bottom = 128 - new_h  # 64
+
+        # Check that padding info is stored (pad_left, pad_top, pad_right, pad_bottom)
+        assert hasattr(result.img_info, "pad_offset")
+        assert result.img_info.pad_offset[0] == 0  # pad_left
+        assert result.img_info.pad_offset[1] == 0  # pad_top
+        assert result.img_info.pad_offset[3] == pad_bottom  # pad_bottom
+
+        # Verify bboxes are correctly transformed (scale only, no offset since pad is bottom-right)
+        expected_x1 = orig_bboxes[:, 0] * scale
+        expected_y1 = orig_bboxes[:, 1] * scale
+        assert torch.allclose(result.bboxes[:, 0].float(), expected_x1.float(), atol=1.0)
+        assert torch.allclose(result.bboxes[:, 1].float(), expected_y1.float(), atol=1.0)
+
+    def test_resize_with_aspect_ratio_tall_to_square(self, tall_image_entity: InstanceSegmentationSample) -> None:
+        """Test aspect ratio resize of tall image to square target (horizontal padding)."""
+        resize = Resize(size=(128, 128), resize_targets=True, keep_aspect_ratio=True)
+        entity = deepcopy(tall_image_entity)
+        orig_bboxes = entity.bboxes.clone()
+        orig_h, orig_w = entity.image.shape[-2:]  # 200, 100
+
+        result = resize(entity)
+
+        # Output should be exactly target size
+        assert result.image.shape[-2:] == (128, 128)
+        assert result.masks.shape[-2:] == (128, 128)
+
+        # Tall image (100w x 200h) -> scale by min(128/100, 128/200) = 0.64
+        # Resized: 64w x 128h, then pad bottom-right only (pad_right=64)
+        scale = min(128 / orig_w, 128 / orig_h)
+        new_w = round(orig_w * scale)  # 64
+        pad_right = 128 - new_w  # 64
+
+        # Check that padding info is stored (pad_left, pad_top, pad_right, pad_bottom)
+        assert hasattr(result.img_info, "pad_offset")
+        assert result.img_info.pad_offset[0] == 0  # pad_left
+        assert result.img_info.pad_offset[1] == 0  # pad_top
+        assert result.img_info.pad_offset[2] == pad_right  # pad_right
+
+        # Verify bboxes are correctly transformed (scale only, no offset since pad is bottom-right)
+        expected_x1 = orig_bboxes[:, 0] * scale
+        expected_y1 = orig_bboxes[:, 1] * scale
+        assert torch.allclose(result.bboxes[:, 0].float(), expected_x1.float(), atol=1.0)
+        assert torch.allclose(result.bboxes[:, 1].float(), expected_y1.float(), atol=1.0)
+
+    def test_resize_with_aspect_ratio_to_non_square(self, wide_image_entity: InstanceSegmentationSample) -> None:
+        """Test aspect ratio resize to non-square target."""
+        resize = Resize(size=(96, 128), resize_targets=True, keep_aspect_ratio=True)  # h, w
+        entity = deepcopy(wide_image_entity)
+        orig_h, orig_w = entity.image.shape[-2:]  # 100, 200
+
+        result = resize(entity)
+
+        # Output should be exactly target size
+        assert result.image.shape[-2:] == (96, 128)
+        assert result.masks.shape[-2:] == (96, 128)
+
+    def test_resize_pad_value(self, wide_image_entity: InstanceSegmentationSample) -> None:
+        """Test that pad_value is correctly applied."""
+        pad_value = 128
+        resize = Resize(size=(128, 128), keep_aspect_ratio=True, pad_value=pad_value)
+        entity = deepcopy(wide_image_entity)
+
+        result = resize(entity)
+
+        # Check that padding areas have the correct value
+        # For wide image, padding is on top and bottom
+        # The top row should be padded (if pad_top > 0)
+        pad_top = result.img_info.pad_offset[1]
+        if pad_top > 0:
+            top_row_mean = result.image[:, 0, :].float().mean()
+            assert abs(top_row_mean - pad_value) < 1.0
+
+    def test_resize_masks_binary_preserved(self, square_image_entity: InstanceSegmentationSample) -> None:
+        """Test that mask binary values are preserved after resize."""
+        resize = Resize(size=(64, 64), resize_targets=True, keep_aspect_ratio=True)
+        entity = deepcopy(square_image_entity)
+
+        result = resize(entity)
+
+        # Masks should only contain 0s and 1s (or near that for interpolation)
+        unique_values = torch.unique(result.masks)
+        assert all(v in {0, 1} for v in unique_values.tolist())
+
+    # ==================== Edge Cases ====================
+
+    def test_resize_empty_bboxes(self) -> None:
+        """Test resize with no bounding boxes."""
+        img_size = (100, 100)
+        entity = DetectionSample(
+            image=tv_tensors.Image(torch.randint(0, 256, (3, *img_size), dtype=torch.uint8)),
+            dm_image_info=DmImageInfo(height=img_size[0], width=img_size[1]),
+            bboxes=tv_tensors.BoundingBoxes(  # type: ignore[call-overload]
+                torch.empty((0, 4), dtype=torch.float32),
+                format=tv_tensors.BoundingBoxFormat.XYXY,
+                canvas_size=img_size,
+            ),
+            label=LongTensor([]),
+        )
+        resize = Resize(size=(64, 64), keep_aspect_ratio=True)
+
+        result = resize(entity)
+
+        assert result.image.shape[-2:] == (64, 64)
+        assert len(result.bboxes) == 0
+
+    def test_resize_empty_masks(self) -> None:
+        """Test resize with empty masks."""
+        img_size = (100, 100)
+        entity = InstanceSegmentationSample(
+            image=tv_tensors.Image(torch.randint(0, 256, (3, *img_size), dtype=torch.uint8)),
+            dm_image_info=DmImageInfo(height=img_size[0], width=img_size[1]),
+            bboxes=tv_tensors.BoundingBoxes(  # type: ignore[call-overload]
+                torch.tensor([[10, 10, 50, 50]], dtype=torch.float32),
+                format=tv_tensors.BoundingBoxFormat.XYXY,
+                canvas_size=img_size,
+            ),
+            label=LongTensor([0]),
+            masks=tv_tensors.Mask(torch.empty((0, *img_size), dtype=torch.uint8)),
+        )
+        resize = Resize(size=(64, 64), keep_aspect_ratio=True)
+
+        result = resize(entity)
+
+        assert result.image.shape[-2:] == (64, 64)
+        assert result.masks.shape[0] == 0
+
+    def test_resize_single_int_size(self) -> None:
+        """Test that single int size is converted to tuple."""
+        resize = Resize(size=64, keep_aspect_ratio=True)
+        assert resize.size == (64, 64)
+
+    def test_resize_tensor_directly(self) -> None:
+        """Test resizing a tensor directly (fallback path)."""
+        tensor = torch.randint(0, 256, (3, 100, 100), dtype=torch.uint8)
+        resize = Resize(size=(64, 64), keep_aspect_ratio=False)
+
+        result = resize(tensor)
+
+        assert result.shape[-2:] == (64, 64)
+
+    # ==================== Consistency Tests ====================
+
+    def test_bbox_inside_image_after_resize(self, square_image_entity: InstanceSegmentationSample) -> None:
+        """Test that all bboxes remain inside image bounds after resize."""
+        resize = Resize(size=(64, 64), resize_targets=True, keep_aspect_ratio=True)
+        entity = deepcopy(square_image_entity)
+
+        result = resize(entity)
+
+        h, w = result.image.shape[-2:]
+        # All bbox coordinates should be within [0, w] for x and [0, h] for y
+        assert torch.all(result.bboxes[:, 0] >= 0)
+        assert torch.all(result.bboxes[:, 1] >= 0)
+        assert torch.all(result.bboxes[:, 2] <= w)
+        assert torch.all(result.bboxes[:, 3] <= h)
+
+    def test_mask_same_size_as_image(self, square_image_entity: InstanceSegmentationSample) -> None:
+        """Test that masks have same spatial size as image after resize."""
+        resize = Resize(size=(64, 64), resize_targets=True, keep_aspect_ratio=True)
+        entity = deepcopy(square_image_entity)
+
+        result = resize(entity)
+
+        assert result.masks.shape[-2:] == result.image.shape[-2:]
+
+    def test_img_info_updated(self, square_image_entity: InstanceSegmentationSample) -> None:
+        """Test that img_info is correctly updated after resize."""
+        resize = Resize(size=(64, 64), resize_targets=True, keep_aspect_ratio=True)
+        entity = deepcopy(square_image_entity)
+
+        result = resize(entity)
+
+        assert result.img_info.img_shape == (64, 64)
+
+    def test_scale_factor_stored(self, wide_image_entity: InstanceSegmentationSample) -> None:
+        """Test that scale factor is stored in img_info when using aspect ratio mode."""
+        resize = Resize(size=(128, 128), resize_targets=True, keep_aspect_ratio=True)
+        entity = deepcopy(wide_image_entity)
+        orig_h, orig_w = entity.image.shape[-2:]
+
+        result = resize(entity)
+
+        # Scale factor should be stored
+        assert hasattr(result.img_info, "scale_factor")
+        expected_scale = min(128 / orig_w, 128 / orig_h)
+        assert abs(result.img_info.scale_factor[0] - expected_scale) < 0.01
+        assert abs(result.img_info.scale_factor[1] - expected_scale) < 0.01
diff --git a/library/tests/unit/data/dataset/test_base.py b/library/tests/unit/data/dataset/test_base.py
index 75cac64693d..b7a5fc2c43f 100644
--- a/library/tests/unit/data/dataset/test_base.py
+++ b/library/tests/unit/data/dataset/test_base.py
@@ -1,10 +1,11 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Unit tests for base_new OTXDataset."""
 
 from __future__ import annotations
 
+from functools import partial
 from unittest.mock import Mock, patch
 
 import pytest
@@ -48,7 +49,7 @@ def test_collate_with_torch_tensors(self):
         assert result.labels == [torch.tensor(0), torch.tensor(1)]
 
     def test_collate_with_different_image_shapes(self):
-        """Test collating items with different image shapes."""
+        """Test collating items with different image shapes raises RuntimeError."""
         sample1 = Mock(spec=OTXSample)
         sample1.image = torch.randn(3, 224, 224)
         sample1.label = None
@@ -66,12 +67,36 @@ def test_collate_with_different_image_shapes(self):
         sample2.img_info = None
 
         items = [sample1, sample2]
-        result = _default_collate_fn(items)
-
-        # When shapes are different, should return list instead of stacked tensor
-        assert isinstance(result.images, list)
-        assert len(result.images) == 2
-        assert result.labels is None
+        # torch.stack requires same-size tensors; different shapes mean
+        # the resize/augmentation pipeline is misconfigured.
+        with pytest.raises(RuntimeError, match="stack expects each tensor to be equal size"):
+            _default_collate_fn(items)
+
+    def test_collate_rejects_unprocessed_16bit_images(self):
+        """Test that int32 tensors (simulating unprocessed 16-bit images) are rejected."""
+        sample = Mock(spec=OTXSample)
+        sample.image = torch.randint(0, 65536, (3, 32, 32), dtype=torch.int32)
+        sample.label = torch.tensor(0)
+        sample.masks = None
+        sample.bboxes = None
+        sample.keypoints = None
+        sample.img_info = None
+
+        with pytest.raises(TypeError, match="high-bit-depth image"):
+            _default_collate_fn([sample])
+
+    def test_collate_rejects_int16_images(self):
+        """Test that int16 tensors (unprocessed signed 16-bit) are rejected."""
+        sample = Mock(spec=OTXSample)
+        sample.image = torch.randint(-1000, 1000, (3, 32, 32), dtype=torch.int16)
+        sample.label = torch.tensor(0)
+        sample.masks = None
+        sample.bboxes = None
+        sample.keypoints = None
+        sample.img_info = None
+
+        with pytest.raises(TypeError, match="high-bit-depth image"):
+            _default_collate_fn([sample])
 
 
 class TestOTXDataset:
@@ -95,17 +120,17 @@ def setup_method(self):
 
     def test_apply_transforms_with_compose(self):
         """Test _apply_transforms with Compose transforms."""
-        from otx.data.transform_libs.torchvision import Compose
+        from torchvision.transforms.v2 import Compose
 
         mock_compose = Mock(spec=Compose)
         mock_entity = Mock(spec=OTXSample)
+        mock_entity.image = torch.rand(3, 32, 32, dtype=torch.float32)
         mock_result = Mock()
         mock_compose.return_value = mock_result
 
         dataset = OTXDataset(
             dm_subset=self.mock_dm_subset,
             transforms=mock_compose,
-            to_tv_image=True,
         )
 
         result = dataset._apply_transforms(mock_entity)
@@ -117,6 +142,7 @@ def test_apply_transforms_with_callable(self):
         """Test _apply_transforms with callable transform."""
         mock_transform = Mock()
         mock_entity = Mock(spec=OTXSample)
+        mock_entity.image = torch.rand(3, 32, 32, dtype=torch.float32)
         mock_result = Mock()
         mock_transform.return_value = mock_result
 
@@ -136,6 +162,7 @@ def test_apply_transforms_with_list(self):
         transform2 = Mock()
 
         mock_entity = Mock(spec=OTXSample)
+        mock_entity.image = torch.rand(3, 32, 32, dtype=torch.float32)
         intermediate_result = Mock()
         final_result = Mock()
 
@@ -159,6 +186,7 @@ def test_apply_transforms_with_list_returns_none(self):
         transform2 = Mock()
 
         mock_entity = Mock(spec=OTXSample)
+        mock_entity.image = torch.rand(3, 32, 32, dtype=torch.float32)
         transform1.return_value = None  # First transform returns None
 
         dataset = OTXDataset(
@@ -180,6 +208,7 @@ def test_iterable_transforms_with_non_list(self):
         )
 
         mock_entity = Mock(spec=OTXSample)
+        mock_entity.image = torch.rand(3, 32, 32, dtype=torch.float32)
         dataset.transforms = "not_a_list"  # String is iterable but not a list
 
         with pytest.raises(TypeError):
@@ -191,6 +220,7 @@ def test_getitem_success(self):
         self.mock_dm_subset.__getitem__ = Mock(return_value=mock_item)
 
         mock_transformed_item = Mock(spec=OTXSample)
+        mock_transformed_item.image = torch.rand(3, 32, 32)
 
         dataset = OTXDataset(
             dm_subset=self.mock_dm_subset,
@@ -224,10 +254,13 @@ def test_getitem_with_refetch(self):
             assert dataset._apply_transforms.call_count == 2
 
     def test_collate_fn_property(self):
-        """Test collate_fn property returns _default_collate_fn."""
+        """Test collate_fn property returns a partial wrapping _default_collate_fn."""
         dataset = OTXDataset(
             dm_subset=self.mock_dm_subset,
             transforms=self.mock_transforms,
         )
 
-        assert dataset.collate_fn == _default_collate_fn
+        collate = dataset.collate_fn
+        assert isinstance(collate, partial)
+        assert collate.func is _default_collate_fn
+        assert collate.keywords.get("stack_images") is True
diff --git a/library/tests/unit/data/dataset/test_detection.py b/library/tests/unit/data/dataset/test_detection.py
index 43ccc6f25e2..ee959e9439b 100644
--- a/library/tests/unit/data/dataset/test_detection.py
+++ b/library/tests/unit/data/dataset/test_detection.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Unit tests for detection dataset."""
@@ -10,7 +10,6 @@
 from datumaro.experimental import Dataset
 
 from otx.data.dataset.detection import OTXDetectionDataset
-from otx.data.entity.sample import DetectionSample
 
 
 class TestOTXDetectionDataset:
@@ -33,15 +32,6 @@ def setup_method(self):
 
         self.mock_transforms = Mock()
 
-    def test_init_sets_sample_type(self):
-        """Test that initialization sets sample_type to DetectionSample."""
-        dataset = OTXDetectionDataset(
-            dm_subset=self.mock_dm_subset,
-            transforms=self.mock_transforms,
-        )
-
-        assert dataset.sample_type == DetectionSample
-
     def test_get_idx_list_per_classes_multiple_classes_per_item(self):
         """Test get_idx_list_per_classes with multiple classes per item."""
         # Mock dataset items with multiple labels per item
diff --git a/library/tests/unit/data/dataset/test_detection_aug_switch.py b/library/tests/unit/data/dataset/test_detection_aug_switch.py
index 5d03550bcf1..ee8215c8c92 100644
--- a/library/tests/unit/data/dataset/test_detection_aug_switch.py
+++ b/library/tests/unit/data/dataset/test_detection_aug_switch.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 """Integration tests for OTXDetectionDataset with DataAugSwitchMixin."""
@@ -7,8 +7,6 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
-import torch
-from torchvision.transforms.v2 import Compose, ToDtype
 
 from otx.backend.native.callbacks.aug_scheduler import DataAugSwitch
 from otx.data.dataset.detection import OTXDetectionDataset
@@ -23,30 +21,37 @@ def sample_policies(self):
         """Create sample augmentation policies."""
         return {
             "no_aug": {
-                "to_tv_image": True,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.float32"}},
+                "augmentations_cpu": [
+                    {
+                        "class_path": "otx.data.augmentation.transforms.Resize",
+                        "init_args": {"size": [640, 640], "keep_aspect_ratio": False},
+                    },
                 ],
             },
             "strong_aug_1": {
-                "to_tv_image": True,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.RandomZoomOut"},
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.float32"}},
+                "augmentations_cpu": [
+                    {"class_path": "torchvision.transforms.v2.RandomHorizontalFlip", "init_args": {"p": 0.5}},
+                    {
+                        "class_path": "otx.data.augmentation.transforms.Resize",
+                        "init_args": {"size": [640, 640], "keep_aspect_ratio": False},
+                    },
                 ],
             },
             "strong_aug_2": {
-                "to_tv_image": False,
-                "transforms": [
-                    {"class_path": "otx.data.transform_libs.torchvision.YOLOXHSVRandomAug"},
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.int32"}},
+                "augmentations_cpu": [
+                    {"class_path": "torchvision.transforms.v2.RandomVerticalFlip", "init_args": {"p": 0.5}},
+                    {
+                        "class_path": "otx.data.augmentation.transforms.Resize",
+                        "init_args": {"size": [640, 640], "keep_aspect_ratio": False},
+                    },
                 ],
             },
             "light_aug": {
-                "to_tv_image": True,
-                "transforms": [
-                    {"class_path": "torchvision.transforms.v2.RandomPhotometricDistort"},
-                    {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": "torch.float32"}},
+                "augmentations_cpu": [
+                    {
+                        "class_path": "otx.data.augmentation.transforms.Resize",
+                        "init_args": {"size": [640, 640], "keep_aspect_ratio": False},
+                    },
                 ],
             },
         }
@@ -54,12 +59,10 @@ def sample_policies(self):
     @pytest.fixture
     def data_aug_switch(self, sample_policies):
         """Create a DataAugSwitch instance."""
-        with patch("otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate") as mock_generate:
-            mock_generate.return_value = Compose([ToDtype(dtype=torch.float32)])
-            switch = DataAugSwitch([4, 29, 50], sample_policies)
-            shared_epoch = Value("i", 0)
-            switch.set_shared_epoch(shared_epoch)
-            return switch
+        switch = DataAugSwitch([4, 29], sample_policies)
+        shared_epoch = Value("i", 0)
+        switch.set_shared_epoch(shared_epoch)
+        return switch
 
     @pytest.fixture
     def mock_dm_subset(self):
@@ -169,6 +172,8 @@ def test_augmentation_switch_integration_light_aug_stage(self, detection_dataset
 
     def test_transforms_updated_correctly(self, detection_dataset, data_aug_switch):
         """Test that transforms are updated correctly when epoch changes."""
+        from otx.data.augmentation import CPUAugmentationPipeline
+
         detection_dataset.set_data_aug_switch(data_aug_switch)
 
         # Test different epochs and verify transforms update
@@ -187,23 +192,19 @@ def test_transforms_updated_correctly(self, detection_dataset, data_aug_switch):
             else:
                 assert policy_name == expected_policy_type
 
-            assert detection_dataset.to_tv_image == data_aug_switch.policies[policy_name]["to_tv_image"]
-            assert detection_dataset.transforms == data_aug_switch.policies[policy_name]["transforms"], (
-                f"transforms should be {data_aug_switch.policies[policy_name]['transforms']} but is {detection_dataset.transforms}"
-            )
+            # Verify that the transforms is now the CPU pipeline for this policy
+            assert isinstance(detection_dataset.transforms, CPUAugmentationPipeline)
 
     def test_detection_dataset_without_aug_switch(self, detection_dataset):
         """Test that detection dataset works normally without augmentation switch."""
 
         # Store original transforms
-        original_to_tv_image = detection_dataset.to_tv_image
         original_transforms = detection_dataset.transforms
 
         # Apply augmentation switch (should do nothing)
         detection_dataset._apply_augmentation_switch()
 
         # Verify nothing changed
-        assert detection_dataset.to_tv_image == original_to_tv_image
         assert detection_dataset.transforms == original_transforms
 
     def test_epoch_boundary_conditions(self, detection_dataset, data_aug_switch):
@@ -256,16 +257,13 @@ def test_multiple_datasets_same_switch(self, mock_dm_subset, data_aug_switch):
 
     def test_error_handling_without_shared_epoch(self, detection_dataset, sample_policies):
         """Test error handling when DataAugSwitch doesn't have shared epoch set."""
-        with patch("otx.data.transform_libs.torchvision.TorchVisionTransformLib.generate") as mock_generate:
-            mock_generate.return_value = Compose([ToDtype(dtype=torch.float32)])
-
-            # Create switch without shared epoch
-            switch = DataAugSwitch([4, 29, 50], sample_policies)
-            detection_dataset.set_data_aug_switch(switch)
+        # Create switch without shared epoch
+        switch = DataAugSwitch([4, 29], sample_policies)
+        detection_dataset.set_data_aug_switch(switch)
 
-            # This should raise an error when trying to access current_policy_name
-            with pytest.raises(ValueError, match="Shared epoch not set"):
-                detection_dataset._apply_augmentation_switch()
+        # This should raise an error when trying to access current_policy_name
+        with pytest.raises(ValueError, match="Shared epoch not set"):
+            detection_dataset._apply_augmentation_switch()
 
     def test_type_annotations_compatibility(self, detection_dataset):
         """Test that type annotations work correctly with mixin."""
@@ -279,16 +277,9 @@ def test_type_annotations_compatibility(self, detection_dataset):
         assert isinstance(detection_dataset.has_dynamic_augmentation, bool)
 
     def test_transforms_pipeline_switch(self, detection_dataset, data_aug_switch, mocker):
-        """Test that augmentation switch is triggered during data retrieval."""
-        switcher = mocker.patch("otx.data.dataset.detection.OTXDetectionDataset._apply_augmentation_switch")
+        """Test that augmentation switch is properly set up on detection dataset."""
         detection_dataset.set_data_aug_switch(data_aug_switch)
-        detection_dataset.augmentations = Compose(
-            [
-                ToDtype(dtype=torch.float32),
-            ]
-        )
 
-        item = next(iter(detection_dataset))
-        # Ensure that the item is processed without errors
-        assert item is not None
-        assert switcher.called
+        # Verify that the augmentation switch is set
+        assert detection_dataset.data_aug_switch is data_aug_switch
+        assert detection_dataset.has_dynamic_augmentation is True
diff --git a/library/tests/unit/data/dataset/test_mixins.py b/library/tests/unit/data/dataset/test_mixins.py
index 6a1fa12792e..d7f18b8cea0 100644
--- a/library/tests/unit/data/dataset/test_mixins.py
+++ b/library/tests/unit/data/dataset/test_mixins.py
@@ -1,147 +1,131 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-"""Tests for dataset mixins."""
+"""Tests for dataset mixins (CPU/GPU pipeline architecture)."""
 
+from __future__ import annotations
+
+from typing import Any
 from unittest.mock import MagicMock
 
 import pytest
-import torch
-from torchvision.transforms.v2 import Compose, ToDtype
 
+from otx.backend.native.callbacks.aug_scheduler import DataAugSwitch
+from otx.data.augmentation import CPUAugmentationPipeline
 from otx.data.dataset.mixins import DataAugSwitchMixin
-from otx.data.entity.sample import OTXSample
 
 
 class MockDataset(DataAugSwitchMixin):
     """Mock dataset class for testing the mixin."""
 
-    def __init__(self, *args, **kwargs):
-        self.to_tv_image = True
-        self.transforms = None
-
-    def _apply_transforms(self, entity: OTXSample) -> OTXSample:
-        return entity
+    def __init__(self):
+        self.transforms: Any = None
 
 
 class TestDataAugSwitchMixin:
-    """Test cases for DataAugSwitchMixin."""
+    """Test cases for DataAugSwitchMixin with CPU/GPU pipeline."""
 
     @pytest.fixture
-    def mock_dataset(self):
-        """Create a mock dataset with the mixin."""
+    def dataset(self):
         return MockDataset()
 
     @pytest.fixture
-    def mock_data_aug_switch(self):
-        """Create a mock DataAugSwitch."""
-        mock_switch = MagicMock()
-        mock_transforms = Compose([ToDtype(dtype=torch.float32)])
-        mock_switch.current_transforms = (True, mock_transforms)
-        return mock_switch
-
-    @pytest.fixture
-    def mock_entity(self):
-        """Create a mock OTXSample."""
-        return MagicMock(spec=OTXSample)
-
-    def test_lazy_initialization(self, mock_dataset):
-        """Test that mixin initializes lazily."""
-        # Initially, the attribute shouldn't exist
-        assert not hasattr(mock_dataset, "data_aug_switch")
-
-        # After calling has_dynamic_augmentation, it should be initialized
-        assert not mock_dataset.has_dynamic_augmentation
-        assert hasattr(mock_dataset, "data_aug_switch")
-        assert mock_dataset.data_aug_switch is None
-
-    def test_set_data_aug_switch(self, mock_dataset, mock_data_aug_switch):
-        """Test setting data augmentation switch."""
-        mock_dataset.set_data_aug_switch(mock_data_aug_switch)
-        assert mock_dataset.data_aug_switch is mock_data_aug_switch
-
-    def test_has_dynamic_augmentation_false_when_none(self, mock_dataset):
-        """Test has_dynamic_augmentation returns False when no switch is set."""
-        assert not mock_dataset.has_dynamic_augmentation
-
-    def test_has_dynamic_augmentation_true_when_set(self, mock_dataset, mock_data_aug_switch):
-        """Test has_dynamic_augmentation returns True when switch is set."""
-        mock_dataset.set_data_aug_switch(mock_data_aug_switch)
-        assert mock_dataset.has_dynamic_augmentation
-
-    def test_apply_augmentation_switch_with_switch(self, mock_dataset, mock_data_aug_switch, mock_entity):
-        """Test _apply_augmentation_switch when switch is set."""
-        mock_dataset.set_data_aug_switch(mock_data_aug_switch)
-
-        policy_name = mock_dataset._apply_augmentation_switch()
-
-        assert mock_dataset.to_tv_image is mock_data_aug_switch.policies[policy_name]["to_tv_image"]
-        assert mock_dataset.transforms is mock_data_aug_switch.policies[policy_name]["transforms"]
+    def mock_switch(self):
+        """A mock DataAugSwitch that returns predictable CPU pipeline."""
+        s = MagicMock(spec=DataAugSwitch)
+        s.current_policy_name = "no_aug"
+        return s
 
-    def test_apply_augmentation_switch_updates_transforms(self, mock_dataset, mock_entity):
-        """Test that augmentation switch properly updates transforms."""
-        # Create a mock switch with specific transforms
-        mock_switch = MagicMock()
-        new_transforms = Compose([ToDtype(dtype=torch.int32)])
-        mock_switch.current_transforms = (False, new_transforms)
+    # -- lazy init -------------------------------------------------------
 
-        mock_dataset.set_data_aug_switch(mock_switch)
-        policy_name = mock_dataset._apply_augmentation_switch()
-
-        assert mock_dataset.to_tv_image is mock_switch.policies[policy_name]["to_tv_image"]
-        assert mock_dataset.transforms is mock_switch.policies[policy_name]["transforms"]
-
-    def test_multiple_switch_updates(self, mock_dataset):
-        """Test multiple updates to the augmentation switch."""
-        # First switch
-        mock_switch1 = MagicMock()
-        transforms1 = Compose([ToDtype(dtype=torch.float32)])
-        mock_switch1.current_transforms = (True, transforms1)
-
-        mock_dataset.set_data_aug_switch(mock_switch1)
-        policy_name = mock_dataset._apply_augmentation_switch()
-
-        assert mock_dataset.to_tv_image is mock_switch1.policies[policy_name]["to_tv_image"]
-        assert mock_dataset.transforms is mock_switch1.policies[policy_name]["transforms"]
-
-        # Second switch
-        mock_switch2 = MagicMock()
-        transforms2 = Compose([ToDtype(dtype=torch.int32)])
-        mock_switch2.current_transforms = (False, transforms2)
-
-        mock_dataset.set_data_aug_switch(mock_switch2)
-        policy_name = mock_dataset._apply_augmentation_switch()
+    def test_lazy_initialization(self, dataset):
+        """Attribute should not exist until first access."""
+        assert not hasattr(dataset, "data_aug_switch")
+        assert not dataset.has_dynamic_augmentation
+        assert hasattr(dataset, "data_aug_switch")
+        assert dataset.data_aug_switch is None
 
-        assert mock_dataset.to_tv_image is mock_switch2.policies[policy_name]["to_tv_image"]
-        assert mock_dataset.transforms is mock_switch2.policies[policy_name]["transforms"]
+    # -- set_data_aug_switch --------------------------------------------
 
-    def test_has_dynamic_augmentation_property_edge_cases(self):
-        """Test edge cases for has_dynamic_augmentation property."""
+    def test_set_data_aug_switch(self, dataset, mock_switch):
+        dataset.set_data_aug_switch(mock_switch)
+        assert dataset.data_aug_switch is mock_switch
 
-        # Dataset without the attribute (should be lazily initialized)
-        class DatasetWithoutSwitch:
-            pass
+    def test_set_data_aug_switch_replaces(self, dataset, mock_switch):
+        dataset.set_data_aug_switch(mock_switch)
+        new_switch = MagicMock(spec=DataAugSwitch)
+        dataset.set_data_aug_switch(new_switch)
+        assert dataset.data_aug_switch is new_switch
 
-        dataset = DatasetWithoutSwitch()
-        dataset._ensure_data_aug_switch_initialized = DataAugSwitchMixin._ensure_data_aug_switch_initialized.__get__(
-            dataset,
-        )
-        dataset.has_dynamic_augmentation = DataAugSwitchMixin.has_dynamic_augmentation.__get__(dataset)
+    # -- has_dynamic_augmentation ---------------------------------------
 
+    def test_has_dynamic_false_when_none(self, dataset):
         assert not dataset.has_dynamic_augmentation
-        # After calling has_dynamic_augmentation, the attribute should be initialized
-        assert hasattr(dataset, "data_aug_switch")
-        assert dataset.data_aug_switch is None
 
-        # Dataset with None value
-        class DatasetWithNoneSwitch:
+    def test_has_dynamic_true_when_set(self, dataset, mock_switch):
+        dataset.set_data_aug_switch(mock_switch)
+        assert dataset.has_dynamic_augmentation
+
+    # -- _apply_augmentation_switch -------------------------------------
+
+    def test_apply_returns_none_when_no_switch(self, dataset):
+        result = dataset._apply_augmentation_switch()
+        assert result is None
+        assert dataset.transforms is None
+
+    def test_apply_sets_transforms_to_cpu_pipeline(self, dataset, mock_switch):
+        expected_pipeline = MagicMock(spec=CPUAugmentationPipeline)
+        mock_switch.get_cpu_pipeline.return_value = expected_pipeline
+        dataset.set_data_aug_switch(mock_switch)
+        policy = dataset._apply_augmentation_switch()
+        assert policy == "no_aug"
+        assert dataset.transforms is expected_pipeline
+        mock_switch.get_cpu_pipeline.assert_called_once_with("no_aug")
+
+    def test_apply_follows_policy_changes(self, dataset, mock_switch):
+        dataset.set_data_aug_switch(mock_switch)
+
+        # First call → no_aug
+        pipeline_no_aug = MagicMock(spec=CPUAugmentationPipeline)
+        mock_switch.current_policy_name = "no_aug"
+        mock_switch.get_cpu_pipeline.return_value = pipeline_no_aug
+        dataset._apply_augmentation_switch()
+        assert dataset.transforms is pipeline_no_aug
+
+        # Policy changes → strong_aug_1
+        pipeline_strong = MagicMock(spec=CPUAugmentationPipeline)
+        mock_switch.current_policy_name = "strong_aug_1"
+        mock_switch.get_cpu_pipeline.return_value = pipeline_strong
+        policy = dataset._apply_augmentation_switch()
+        assert policy == "strong_aug_1"
+        assert dataset.transforms is pipeline_strong
+
+    def test_apply_does_not_touch_to_tv_image(self, dataset, mock_switch):
+        """to_tv_image should NOT be mutated — GPU pipeline handles normalization."""
+        dataset.to_tv_image = True
+        dataset.set_data_aug_switch(mock_switch)
+        dataset._apply_augmentation_switch()
+        assert dataset.to_tv_image is True
+
+    # -- edge cases -----------------------------------------------------
+
+    def test_mixin_on_plain_class(self):
+        """Mixin works even on a plain class that doesn't inherit OTXDataset."""
+
+        class PlainDataset(DataAugSwitchMixin):
             def __init__(self):
-                self.data_aug_switch = None
+                self.transforms: Any = None
+
+        ds = PlainDataset()
+        assert not ds.has_dynamic_augmentation
 
-        dataset2 = DatasetWithNoneSwitch()
-        dataset2._ensure_data_aug_switch_initialized = DataAugSwitchMixin._ensure_data_aug_switch_initialized.__get__(
-            dataset2,
-        )
-        dataset2.has_dynamic_augmentation = DataAugSwitchMixin.has_dynamic_augmentation.__get__(dataset2)
+        mock = MagicMock(spec=DataAugSwitch)
+        mock.current_policy_name = "light_aug"
+        expected_pipeline = MagicMock(spec=CPUAugmentationPipeline)
+        mock.get_cpu_pipeline.return_value = expected_pipeline
+        ds.set_data_aug_switch(mock)
 
-        assert not dataset2.has_dynamic_augmentation
+        assert ds.has_dynamic_augmentation
+        policy = ds._apply_augmentation_switch()
+        assert policy == "light_aug"
+        assert ds.transforms is expected_pipeline
diff --git a/library/tests/unit/data/entity/test_torch.py b/library/tests/unit/data/entity/test_torch.py
index c1b2aebd274..88a0a959f17 100644
--- a/library/tests/unit/data/entity/test_torch.py
+++ b/library/tests/unit/data/entity/test_torch.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 """Unit tests of sample batch data entity."""
 
@@ -8,17 +8,19 @@
 from torch import LongTensor
 from torchvision import tv_tensors
 
+from otx.data.dataset.base import _default_collate_fn
 from otx.data.entity.base import ImageInfo
-from otx.data.entity.sample import OTXSample, OTXSampleBatch, collate_fn
+from otx.data.entity.sample import OTXSample, OTXSampleBatch
 
 
 class TestCollateFn:
     def test_collate_fn(self) -> None:
-        """Test collate_fn function."""
+        """Test _default_collate_fn function."""
         # Create mock samples with required attributes
         samples = []
         for i in range(3):
             sample = Mock(spec=OTXSample)
+            # Use float32 images since _default_collate_fn expects tensors
             sample.image = tv_tensors.Image(torch.randn(3, 224, 224))
             sample.img_info = ImageInfo(img_idx=i, img_shape=(224, 224), ori_shape=(224, 224))
             sample.bboxes = tv_tensors.BoundingBoxes(
@@ -31,7 +33,7 @@ def test_collate_fn(self) -> None:
             sample.keypoints = None
             samples.append(sample)
 
-        data_batch = collate_fn(samples)
+        data_batch = _default_collate_fn(samples)
         assert len(data_batch.imgs_info) == len(data_batch.images)
         assert isinstance(data_batch, OTXSampleBatch)
         for field in OTXSampleBatch.__dataclass_fields__:
diff --git a/library/tests/unit/data/entity/test_utils.py b/library/tests/unit/data/entity/test_utils.py
index e90848251d7..fff0bdae2d9 100644
--- a/library/tests/unit/data/entity/test_utils.py
+++ b/library/tests/unit/data/entity/test_utils.py
@@ -1,32 +1,30 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 """Unit tests of utils for data entity."""
 
 import torch
 
-from otx.data.entity import ImageInfo, OTXSampleBatch
+from otx.data.entity import ImageInfo
 from otx.data.entity.utils import stack_batch
 
 
 def test_stack_batch():
-    # Create a sample entity with tensor images
-    entity = OTXSampleBatch(
-        images=[
-            torch.tensor([[[1, 2], [3, 4], [5, 6]]], dtype=torch.uint8),
-            torch.tensor([[[5, 6, 7], [8, 9, 10]]], dtype=torch.uint8),
-            torch.tensor([[[11, 12, 13, 14], [15, 16, 17, 18]]], dtype=torch.uint8),
-            torch.tensor([[[19, 20, 0], [0, 0, 0]]], dtype=torch.uint8),
-        ],
-        imgs_info=[
-            ImageInfo(img_shape=(3, 2), img_idx=0, ori_shape=(2, 2)),
-            ImageInfo(img_shape=(2, 3), img_idx=1, ori_shape=(2, 3)),
-            ImageInfo(img_shape=(2, 4), img_idx=2, ori_shape=(2, 4)),
-            ImageInfo(img_shape=(2, 3), img_idx=3, ori_shape=(1, 2), padding=(0, 0, 1, 1)),  # previously padded image
-        ],
-    )
+    # Create raw image tensors and image info for testing stack_batch directly
+    images = [
+        torch.tensor([[[1, 2], [3, 4], [5, 6]]], dtype=torch.uint8),
+        torch.tensor([[[5, 6, 7], [8, 9, 10]]], dtype=torch.uint8),
+        torch.tensor([[[11, 12, 13, 14], [15, 16, 17, 18]]], dtype=torch.uint8),
+        torch.tensor([[[19, 20, 0], [0, 0, 0]]], dtype=torch.uint8),
+    ]
+    imgs_info = [
+        ImageInfo(img_shape=(3, 2), img_idx=0, ori_shape=(2, 2)),
+        ImageInfo(img_shape=(2, 3), img_idx=1, ori_shape=(2, 3)),
+        ImageInfo(img_shape=(2, 4), img_idx=2, ori_shape=(2, 4)),
+        ImageInfo(img_shape=(2, 3), img_idx=3, ori_shape=(1, 2), padding=(0, 0, 1, 1)),  # previously padded image
+    ]
 
     # Call the stack_batch function
-    stacked_images, batch_info = stack_batch(entity.images, entity.imgs_info, pad_size_divisor=1, pad_value=0)
+    stacked_images, batch_info = stack_batch(images, imgs_info, pad_size_divisor=1, pad_value=0)
 
     # Assert the output
     assert len(stacked_images) == 4
diff --git a/library/tests/unit/data/test_factory.py b/library/tests/unit/data/test_factory.py
index a3baa8b1cd6..9782b6a6c14 100644
--- a/library/tests/unit/data/test_factory.py
+++ b/library/tests/unit/data/test_factory.py
@@ -1,8 +1,11 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 """Test Factory classes for dataset and transforms."""
 
+from unittest.mock import MagicMock, PropertyMock
+
+import polars as pl
 import pytest
 from datumaro.experimental import Dataset
 
@@ -17,24 +20,7 @@
 from otx.data.dataset.instance_segmentation import OTXInstanceSegDataset
 from otx.data.dataset.segmentation import OTXSegmentationDataset
 from otx.data.factory import OTXDatasetFactory, TransformLibFactory
-from otx.data.transform_libs.torchvision import TorchVisionTransformLib
 from otx.types.task import OTXTaskType
-from otx.types.transformer_libs import TransformLibType
-
-lib_type_parameters = [(TransformLibType.TORCHVISION, TorchVisionTransformLib)]
-
-
-class TestTransformLibFactory:
-    @pytest.mark.parametrize(
-        ("lib_type", "lib"),
-        lib_type_parameters,
-    )
-    def test_generate(self, lib_type, lib, mocker) -> None:
-        mock_generate = mocker.patch.object(lib, "generate")
-        config = mocker.MagicMock(spec=SubsetConfig)
-        config.transform_lib_type = lib_type
-        _ = TransformLibFactory.generate(config)
-        mock_generate.assert_called_once_with(config)
 
 
 class TestOTXDatasetFactory:
@@ -78,3 +64,64 @@ def test_create(
             ),
             dataset_cls,
         )
+
+
+class TestDetectStorageDtype:
+    """Tests for OTXDatasetFactory._detect_storage_dtype."""
+
+    def test_schema_uint16(self):
+        """Schema-declared UInt16 dtype → 'uint16'."""
+        mock_subset = MagicMock(spec=Dataset)
+        # Make iteration raise StopIteration (empty dataset)
+        mock_subset.__iter__ = MagicMock(return_value=iter([]))
+        mock_field = MagicMock()
+        mock_field.dtype = pl.UInt16
+        mock_img_attr = MagicMock()
+        mock_img_attr.field = mock_field
+        mock_schema = MagicMock()
+        mock_schema.attributes = {"image": mock_img_attr}
+        type(mock_subset).schema = PropertyMock(return_value=mock_schema)
+
+        assert OTXDatasetFactory._detect_storage_dtype(mock_subset) == "uint16"
+
+    def test_schema_float32(self):
+        """Schema-declared Float32 dtype → 'float32'."""
+        mock_subset = MagicMock(spec=Dataset)
+        mock_subset.__iter__ = MagicMock(return_value=iter([]))
+        mock_field = MagicMock()
+        mock_field.dtype = pl.Float32
+        mock_img_attr = MagicMock()
+        mock_img_attr.field = mock_field
+        mock_schema = MagicMock()
+        mock_schema.attributes = {"image": mock_img_attr}
+        type(mock_subset).schema = PropertyMock(return_value=mock_schema)
+
+        assert OTXDatasetFactory._detect_storage_dtype(mock_subset) == "float32"
+
+    def test_schema_unknown_defaults_uint8(self):
+        """Unknown/missing schema dtype → default 'uint8'."""
+        mock_subset = MagicMock(spec=Dataset)
+        mock_subset.__iter__ = MagicMock(return_value=iter([]))
+        mock_schema = MagicMock()
+        mock_schema.attributes = {}
+        type(mock_subset).schema = PropertyMock(return_value=mock_schema)
+
+        assert OTXDatasetFactory._detect_storage_dtype(mock_subset) == "uint8"
+
+    def test_file_based_detection(self, tmp_path):
+        """File-header probing detects uint8 from a real PNG."""
+        import numpy as np
+        from PIL import Image as PILImage
+
+        # Create a small uint8 PNG
+        img_path = tmp_path / "test.png"
+        PILImage.fromarray(np.zeros((4, 4, 3), dtype=np.uint8)).save(img_path)
+
+        mock_media = MagicMock()
+        mock_media.path = str(img_path)
+        mock_item = MagicMock()
+        mock_item.media = mock_media
+        mock_subset = MagicMock(spec=Dataset)
+        mock_subset.__iter__ = MagicMock(return_value=iter([mock_item]))
+
+        assert OTXDatasetFactory._detect_storage_dtype(mock_subset) == "uint8"
diff --git a/library/tests/unit/data/test_module.py b/library/tests/unit/data/test_module.py
index a73b4f5f64e..deed0b519b0 100644
--- a/library/tests/unit/data/test_module.py
+++ b/library/tests/unit/data/test_module.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
@@ -22,7 +22,6 @@
     OTXDataModule,
     OTXTaskType,
 )
-from otx.data.transform_libs.torchvision import Compose, RandomFlip
 
 
 class TestOTXDataModule:
@@ -350,36 +349,6 @@ def test_from_otx_datasets_label_info_mismatch(self, fxt_mock_dataset) -> None:
                 val_dataset=mock_val,
             )
 
-    def test_from_otx_datasets_with_normalization(self, mocker, fxt_mock_subset_configs, fxt_mock_dataset) -> None:
-        """Test from_otx_datasets correctly extracts normalization parameters."""
-        from torchvision.transforms.v2 import Normalize
-
-        # Create mock dataset with Normalize transform
-        normalize_transform = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-
-        shared_label_info = MagicMock()
-        mock_train = fxt_mock_dataset(
-            transforms=[normalize_transform],
-            label_info=shared_label_info,
-        )
-        mock_val = fxt_mock_dataset(label_info=shared_label_info)
-
-        mocker.patch.object(
-            OTXDataModule,
-            "get_default_subset_configs",
-            return_value=fxt_mock_subset_configs,
-        )
-
-        # Create module
-        module = OTXDataModule.from_otx_datasets(
-            train_dataset=mock_train,
-            val_dataset=mock_val,
-        )
-
-        # Assertions - normalization params should be extracted
-        assert module.input_mean == tuple(normalize_transform.mean)
-        assert module.input_std == tuple(normalize_transform.std)
-
     def test_from_otx_datasets_with_auto_num_workers(self, mocker, fxt_mock_subset_configs, fxt_mock_dataset) -> None:
         """Test from_otx_datasets with auto_num_workers enabled."""
         # Create mock datasets
@@ -413,37 +382,20 @@ def test_from_otx_datasets_with_auto_num_workers(self, mocker, fxt_mock_subset_c
         assert module.device == DeviceType.auto  # Default value
 
     @pytest.mark.parametrize(
-        "transforms_source",
+        ("transforms_source", "expected"),
         [
-            None,
-            [
-                {"class_path": "otx.data.transform_libs.torchvision.RandomFlip", "init_args": {"probability": 0.5}},
-                {
-                    "class_path": "otx.data.transform_libs.torchvision.Normalize",
-                    "init_args": {"mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375]},
-                },
-            ],
-            [
-                RandomFlip(probability=0.5),
-                Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]),
-            ],
-            Compose(
-                [
-                    RandomFlip(probability=0.5),
-                    Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]),
-                ]
+            (None, (None, None)),
+            (
+                [Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])],
+                ((123.675, 116.28, 103.53), (58.395, 57.12, 57.375)),
             ),
         ],
-        ids=["from None", "from list of configs", "from list of objects", "from compose"],
+        ids=["no normalize", "with normalize"],
     )
-    def test_extract_normalization_params(self, transforms_source) -> None:
-        """Test _extract_normalization_params with various transform sources."""
-        mean, std = OTXDataModule.extract_normalization_params(transforms_source)
-
-        # Assertions based on expected values
-        if transforms_source is None:
-            assert mean == (0.0, 0.0, 0.0)
-            assert std == (1.0, 1.0, 1.0)
-        else:
-            assert mean == (123.675, 116.28, 103.53)
-            assert std == (58.395, 57.12, 57.375)
+    def test_extract_normalization_params(self, transforms_source, expected) -> None:
+        """Test CPUAugmentationPipeline._extract_normalization_params."""
+        from otx.data.augmentation.pipeline import CPUAugmentationPipeline
+
+        pipeline = CPUAugmentationPipeline(augmentations=transforms_source)
+        result = (pipeline.mean, pipeline.std)
+        assert result == expected
diff --git a/library/tests/unit/data/test_transform_libs.py b/library/tests/unit/data/test_transform_libs.py
deleted file mode 100644
index a720b0229e8..00000000000
--- a/library/tests/unit/data/test_transform_libs.py
+++ /dev/null
@@ -1,141 +0,0 @@
-# Copyright (C) 2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-from __future__ import annotations
-
-from copy import deepcopy
-from typing import Any
-
-import pytest
-import torch
-from lightning.pytorch.cli import instantiate_class
-from omegaconf import OmegaConf
-from torchvision.transforms import v2
-
-from otx.config.data import SubsetConfig
-from otx.data.transform_libs.torchvision import (
-    TorchVisionTransformLib,
-)
-
-
-class TestTorchVisionTransformLib:
-    @pytest.fixture(params=["from_dict", "from_obj", "from_compose"])
-    def fxt_config(self, request) -> list[dict[str, Any]]:
-        if request.param == "from_compose":
-            return v2.Compose(
-                [
-                    v2.RandomResizedCrop(size=(224, 224), antialias=True),
-                    v2.RandomHorizontalFlip(p=0.5),
-                    v2.ToDtype(torch.float32),
-                    v2.Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]),
-                ],
-            )
-        prefix = "torchvision.transforms.v2"
-        cfg = f"""
-        transforms:
-          - class_path: {prefix}.RandomResizedCrop
-            init_args:
-                size: [224, 224]
-                antialias: True
-          - class_path: {prefix}.RandomHorizontalFlip
-            init_args:
-                p: 0.5
-          - class_path: {prefix}.ToDtype
-            init_args:
-                dtype: ${{as_torch_dtype:torch.float32}}
-          - class_path: {prefix}.Normalize
-            init_args:
-                mean: [123.675, 116.28, 103.53]
-                std: [58.395, 57.12, 57.375]
-        """
-        created = OmegaConf.create(cfg)
-        if request.param == "from_obj":
-            return SubsetConfig(
-                batch_size=1,
-                transforms=[instantiate_class(args=(), init=transform) for transform in created.transforms],
-            )
-        return created
-
-    def test_transform_enable_flag(self) -> None:
-        prefix = "torchvision.transforms.v2"
-        cfg_str = f"""
-        transforms:
-          - class_path: {prefix}.RandomResizedCrop
-            init_args:
-                size: [224, 224]
-                antialias: True
-          - class_path: {prefix}.RandomHorizontalFlip
-            init_args:
-                p: 0.5
-          - class_path: {prefix}.ToDtype
-            init_args:
-                dtype: ${{as_torch_dtype:torch.float32}}
-          - class_path: {prefix}.Normalize
-            init_args:
-                mean: [123.675, 116.28, 103.53]
-                std: [58.395, 57.12, 57.375]
-        """
-        cfg_org = OmegaConf.create(cfg_str)
-
-        cfg = deepcopy(cfg_org)
-        cfg.transforms[0].enable = False  # Remove 1st
-        transform = TorchVisionTransformLib.generate(cfg)
-        assert len(transform.transforms) == 3
-        assert "RandomResizedCrop" not in repr(transform)
-
-        cfg = deepcopy(cfg_org)
-        cfg.transforms[1].enable = False  # Remove 2nd
-        transform = TorchVisionTransformLib.generate(cfg)
-        assert len(transform.transforms) == 3
-        assert "RandomHorizontalFlip" not in repr(transform)
-
-        cfg = deepcopy(cfg_org)
-        cfg.transforms[2].enable = True  # No effect
-        transform = TorchVisionTransformLib.generate(cfg)
-        assert len(transform.transforms) == 4
-        assert "ToDtype" in repr(transform)
-
-    @pytest.fixture
-    def fxt_config_w_input_size(self) -> list[dict[str, Any]]:
-        cfg = """
-        input_size:
-        - 300
-        - 200
-        transforms:
-          - class_path: otx.data.transform_libs.torchvision.RandomResize
-            init_args:
-                scale: $(input_size) * 0.5
-          - class_path: otx.data.transform_libs.torchvision.RandomCrop
-            init_args:
-                crop_size: $(input_size)
-          - class_path: otx.data.transform_libs.torchvision.RandomResize
-            init_args:
-                scale: $(input_size) * 1.1
-        """
-        return OmegaConf.create(cfg)
-
-    def test_configure_input_size(self, fxt_config_w_input_size):
-        transform = TorchVisionTransformLib.generate(fxt_config_w_input_size)
-        assert isinstance(transform, v2.Compose)
-        assert transform.transforms[0].scale == (150, 100)  # RandomResize gets sequence of integer
-        assert transform.transforms[1].crop_size == (300, 200)  # RandomCrop gets sequence of integer
-        assert transform.transforms[2].scale == (round(300 * 1.1), round(200 * 1.1))  # check round
-
-    def test_configure_input_size_none(self, fxt_config_w_input_size):
-        """Check input size is None but transform has $(ipnput_size)."""
-        fxt_config_w_input_size.input_size = None
-        with pytest.raises(RuntimeError, match="input_size is set to None"):
-            TorchVisionTransformLib.generate(fxt_config_w_input_size)
-
-    def test_eval_input_size_str(self):
-        assert TorchVisionTransformLib._eval_input_size_str("2") == 2
-        assert TorchVisionTransformLib._eval_input_size_str("(2, 3)") == (2, 3)
-        assert TorchVisionTransformLib._eval_input_size_str("2*3") == 6
-        assert TorchVisionTransformLib._eval_input_size_str("(2, 3) *3") == (6, 9)
-        assert TorchVisionTransformLib._eval_input_size_str("(5, 5) / 2") == (2, 2)
-        assert TorchVisionTransformLib._eval_input_size_str("(10, 11) * -0.5") == (-5, -6)
-
-    @pytest.mark.parametrize("input_str", ["1+1", "1+-5", "rm fake", "hoho"])
-    def test_eval_input_size_str_wrong_value(self, input_str):
-        with pytest.raises(SyntaxError):
-            assert TorchVisionTransformLib._eval_input_size_str(input_str)
diff --git a/library/tests/unit/data/transform_libs/__init__.py b/library/tests/unit/data/transform_libs/__init__.py
deleted file mode 100644
index 2e36e6836df..00000000000
--- a/library/tests/unit/data/transform_libs/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (C) 2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-"""Unit tests of data transforms."""
diff --git a/library/tests/unit/data/transform_libs/test_torchvision.py b/library/tests/unit/data/transform_libs/test_torchvision.py
deleted file mode 100644
index 0f7a03d9d7e..00000000000
--- a/library/tests/unit/data/transform_libs/test_torchvision.py
+++ /dev/null
@@ -1,1215 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-# Copyright (c) OpenMMLab. All rights reserved.
-"""Unit tests of detection data transform."""
-
-from __future__ import annotations
-
-from copy import deepcopy
-
-import numpy as np
-import pytest
-import torch
-from datumaro.experimental.fields import ImageInfo as DmImageInfo
-from torch import LongTensor
-from torchvision import tv_tensors
-from torchvision.transforms import v2 as tvt_v2
-from torchvision.transforms.v2 import ToDtype
-from torchvision.transforms.v2 import functional as F  # noqa: N812
-
-from otx.data.entity.sample import (
-    ClassificationSample,
-    DetectionSample,
-    InstanceSegmentationSample,
-    KeypointSample,
-    OTXSample,
-    OTXSampleBatch,
-    SegmentationSample,
-)
-from otx.data.transform_libs.torchvision import (
-    CachedMixUp,
-    CachedMosaic,
-    Compose,
-    MinIoURandomCrop,
-    Pad,
-    PhotoMetricDistortion,
-    RandomAffine,
-    RandomCrop,
-    RandomFlip,
-    RandomGaussianNoise,
-    RandomResize,
-    Resize,
-    TopdownAffine,
-    YOLOXHSVRandomAug,
-)
-from otx.data.transform_libs.utils import overlap_bboxes
-
-RNG = np.random.default_rng(42)
-
-
-class MockFrame:
-    data = np.ndarray([10, 10, 3], dtype=np.uint8)
-
-
-class MockVideo:
-    data = [MockFrame()] * 10
-
-    def __getitem__(self, idx):
-        return self.data[idx]
-
-    def close(self):
-        return
-
-
-@pytest.fixture
-def seg_data_entity() -> SegmentationSample:
-    from datumaro.experimental.fields import ImageInfo as DmImageInfo
-
-    masks = torch.randint(low=0, high=2, size=(1, 112, 224), dtype=torch.uint8)
-    return SegmentationSample(
-        image=tv_tensors.Image(torch.randint(low=0, high=256, size=(3, 112, 224), dtype=torch.uint8)),
-        dm_image_info=DmImageInfo(height=112, width=224),
-        masks=tv_tensors.Mask(masks),
-    )
-
-
-@pytest.fixture
-def det_data_entity() -> DetectionSample:
-    from datumaro.experimental.fields import ImageInfo as DmImageInfo
-
-    return DetectionSample(
-        image=tv_tensors.Image(torch.randint(low=0, high=256, size=(3, 112, 224), dtype=torch.uint8)),
-        dm_image_info=DmImageInfo(height=112, width=224),
-        bboxes=np.array([[0, 0, 50, 50]], dtype=np.float32),
-        label=LongTensor([1]),
-    )
-
-
-@pytest.fixture
-def det_data_entity_with_masks() -> InstanceSegmentationSample:
-    """Create a data entity with masks for testing."""
-    from datumaro.experimental.fields import ImageInfo as DmImageInfo
-
-    img_size = (112, 224)
-    fake_image = torch.randint(low=0, high=256, size=(3, *img_size), dtype=torch.uint8)
-    fake_bboxes = np.array([[10, 10, 50, 50], [60, 60, 100, 100]], dtype=np.float32)
-    fake_labels = LongTensor([1, 2])
-
-    # Create meaningful masks that correspond to the bounding boxes
-    masks = torch.zeros(size=(2, *img_size), dtype=torch.uint8)
-    masks[0, 10:50, 10:50] = 1  # First mask
-    masks[1, 60:100, 60:100] = 1  # Second mask
-    fake_masks = tv_tensors.Mask(masks)
-
-    return InstanceSegmentationSample(
-        image=tv_tensors.Image(fake_image),
-        dm_image_info=DmImageInfo(height=img_size[0], width=img_size[1]),
-        bboxes=fake_bboxes,
-        label=fake_labels,
-        masks=fake_masks,
-    )
-
-
-@pytest.fixture
-def det_data_entity_empty_masks() -> InstanceSegmentationSample:
-    """Create a data entity with empty masks for testing."""
-    from datumaro.experimental.fields import ImageInfo as DmImageInfo
-
-    img_size = (112, 224)
-    fake_image = torch.randint(low=0, high=256, size=(3, *img_size), dtype=torch.uint8)
-    fake_bboxes = np.array([[10, 10, 50, 50]], dtype=np.float32)
-    fake_labels = LongTensor([1])
-
-    # Create empty masks
-    fake_masks = tv_tensors.Mask(torch.zeros(size=(0, *img_size), dtype=torch.uint8))
-
-    return InstanceSegmentationSample(
-        image=tv_tensors.Image(fake_image),
-        dm_image_info=DmImageInfo(height=img_size[0], width=img_size[1]),
-        bboxes=fake_bboxes,
-        label=fake_labels,
-        masks=fake_masks,
-    )
-
-
-class TestMinIoURandomCrop:
-    @pytest.fixture
-    def min_iou_random_crop(self) -> MinIoURandomCrop:
-        return MinIoURandomCrop(is_numpy_to_tvtensor=False)
-
-    def test_forward(self, min_iou_random_crop: MinIoURandomCrop, det_data_entity: DetectionSample) -> None:
-        """Test forward."""
-        results = min_iou_random_crop(deepcopy(det_data_entity))
-
-        if (mode := min_iou_random_crop.mode) == 1:
-            assert torch.equal(results.bboxes, det_data_entity.bboxes)
-        else:
-            patch = tv_tensors.wrap(torch.tensor([[0, 0, *results.img_info.img_shape]]), like=results.bboxes)
-            ious = overlap_bboxes(patch, results.bboxes)
-            assert torch.all(ious >= mode)
-            assert results.image.shape[:2] == results.img_info.img_shape
-            assert results.img_info.scale_factor is None
-
-
-class TestResize:
-    @pytest.fixture
-    def resize(self) -> Resize:
-        return Resize(scale=(128, 96), is_numpy_to_tvtensor=False)  # (64, 64) -> (128, 96)
-
-    @pytest.mark.parametrize(
-        ("keep_ratio", "expected_shape", "expected_scale_factor"),
-        [
-            (True, (96, 96), (1.5, 1.5)),
-            (False, (128, 96), (2.0, 1.5)),
-        ],
-    )
-    def test_forward_only_image(
-        self,
-        resize: Resize,
-        fxt_det_data_entity: tuple[tuple, OTXSample, OTXSampleBatch],
-        keep_ratio: bool,
-        expected_shape: tuple,
-        expected_scale_factor: tuple,
-    ) -> None:
-        """Test forward only image."""
-        resize.keep_ratio = keep_ratio
-        resize.transform_bbox = False
-        resize.transform_mask = False
-        entity = deepcopy(fxt_det_data_entity[0])
-
-        results = resize(entity)
-
-        assert results.img_info.ori_shape == (64, 64)
-        if keep_ratio:
-            assert results.image.shape[:2] == expected_shape
-            assert results.img_info.img_shape == expected_shape
-            assert results.img_info.scale_factor == expected_scale_factor
-        else:
-            assert results.image.shape[:2] == expected_shape
-            assert results.img_info.img_shape == expected_shape
-            assert results.img_info.scale_factor == expected_scale_factor
-
-        assert torch.all(results.bboxes.data == fxt_det_data_entity[0].bboxes.data)
-
-    @pytest.mark.parametrize(
-        ("keep_ratio", "expected_shape"),
-        [
-            (True, (96, 96)),
-            (False, (128, 96)),
-        ],
-    )
-    def test_forward_bboxes_masks(
-        self,
-        resize: Resize,
-        fxt_inst_seg_data_entity: tuple[tuple, OTXSample, OTXSampleBatch],
-        keep_ratio: bool,
-        expected_shape: tuple,
-    ) -> None:
-        """Test forward with bboxes and masks."""
-        resize.transform_bbox = True
-        resize.transform_mask = True
-        entity = deepcopy(fxt_inst_seg_data_entity[0])
-
-        resize.keep_ratio = keep_ratio
-        results = resize(entity)
-
-        assert results.image.shape[:2] == expected_shape
-        assert results.img_info.img_shape == expected_shape
-        assert torch.all(
-            results.bboxes
-            == fxt_inst_seg_data_entity[0].bboxes * torch.tensor(results.img_info.scale_factor[::-1] * 2),
-        )
-        assert results.masks.shape[1:] == expected_shape
-
-
-class TestRandomFlip:
-    @pytest.fixture
-    def random_flip(self) -> RandomFlip:
-        return RandomFlip(probability=1.0, is_numpy_to_tvtensor=False)
-
-    def test_forward(
-        self,
-        random_flip: RandomFlip,
-        fxt_inst_seg_data_entity: tuple[tuple, OTXSample, OTXSampleBatch],
-    ) -> None:
-        """Test forward."""
-        entity = deepcopy(fxt_inst_seg_data_entity[0])
-
-        results = random_flip.forward(entity)
-
-        # test image
-        assert torch.all(F.to_image(results.image.copy()) == fxt_inst_seg_data_entity[0].image)
-
-        # test bboxes
-        bboxes_results = results.bboxes.clone()
-        bboxes_results[..., 0] = results.img_info.img_shape[1] - results.bboxes[..., 2]
-        bboxes_results[..., 2] = results.img_info.img_shape[1] - results.bboxes[..., 0]
-        assert torch.all(bboxes_results == fxt_inst_seg_data_entity[0].bboxes)
-
-        # test masks
-        assert torch.all(tv_tensors.Mask(results.masks).flip(-1) == fxt_inst_seg_data_entity[0].masks)
-
-
-class TestPhotoMetricDistortion:
-    @pytest.fixture
-    def photo_metric_distortion(self) -> PhotoMetricDistortion:
-        return PhotoMetricDistortion(is_numpy_to_tvtensor=False)
-
-    def test_forward(self, photo_metric_distortion: PhotoMetricDistortion, det_data_entity: DetectionSample) -> None:
-        """Test forward."""
-        results = photo_metric_distortion(deepcopy(det_data_entity))
-
-        assert results.image.dtype == np.float32
-
-
-class TestRandomAffine:
-    @pytest.fixture
-    def random_affine(self) -> RandomAffine:
-        return RandomAffine(is_numpy_to_tvtensor=False)
-
-    @pytest.fixture
-    def random_affine_with_mask_transform(self) -> RandomAffine:
-        return RandomAffine(transform_mask=True, mask_fill_value=0, is_numpy_to_tvtensor=False)
-
-    @pytest.fixture
-    def random_affine_without_mask_transform(self) -> RandomAffine:
-        return RandomAffine(transform_mask=False, is_numpy_to_tvtensor=False)
-
-    def test_init_invalid_translate_ratio(self) -> None:
-        with pytest.raises(ValueError):  # noqa: PT011
-            RandomAffine(max_translate_ratio=1.5)
-
-    def test_init_invalid_scaling_ratio_range_inverse_order(self) -> None:
-        with pytest.raises(ValueError):  # noqa: PT011
-            RandomAffine(scaling_ratio_range=(1.5, 0.5))
-
-    def test_init_invalid_scaling_ratio_range_zero_value(self) -> None:
-        with pytest.raises(ValueError):  # noqa: PT011
-            RandomAffine(scaling_ratio_range=(0, 0.5))
-
-    def test_forward(self, random_affine: RandomAffine, det_data_entity: DetectionSample) -> None:
-        """Test forward."""
-        results = random_affine(deepcopy(det_data_entity))
-
-        assert results.image.shape[:2] == (112, 224)
-        assert results.label.shape[0] == results.bboxes.shape[0]
-        assert results.label.dtype == torch.long
-        assert results.bboxes.dtype == torch.float32
-        assert results.img_info.img_shape == results.image.shape[:2]
-
-    def test_segmentation_transform(
-        self, random_affine_with_mask_transform: RandomAffine, seg_data_entity: SegmentationSample
-    ) -> None:
-        """Test forward for segmentation task."""
-        original_entity = deepcopy(seg_data_entity)
-        results = random_affine_with_mask_transform(original_entity)
-
-        assert hasattr(results, "masks")
-        assert results.masks is not None
-        assert results.masks.shape[0] > 0  # Should have masks
-        assert results.masks.shape[1:] == results.image.shape[:2]  # Same spatial dimensions as image
-        assert isinstance(results.masks, tv_tensors.Mask)
-
-    def test_forward_with_masks_transform_enabled(
-        self,
-        random_affine_with_mask_transform: RandomAffine,
-        det_data_entity_with_masks: InstanceSegmentationSample,
-    ) -> None:
-        """Test forward with masks when transform_mask is True."""
-        original_entity = deepcopy(det_data_entity_with_masks)
-        results = random_affine_with_mask_transform(original_entity)
-
-        # Check that masks are present and transformed
-        assert hasattr(results, "masks")
-        assert results.masks is not None
-        assert results.masks.shape[0] > 0  # Should have masks
-        assert results.masks.shape[1:] == results.image.shape[:2]  # Same spatial dimensions as image
-
-        # Check that the number of masks matches the number of remaining bboxes and labels
-        assert results.masks.shape[0] == results.bboxes.shape[0]
-        assert results.masks.shape[0] == results.label.shape[0]
-
-        # Check that masks are still binary (0 or 255)
-        unique_values = torch.unique(results.masks)
-        assert len(unique_values) <= 2  # Should only have 0 and/or 255
-
-        # Check data types
-        assert results.masks.dtype == torch.bool
-        assert isinstance(results.masks, tv_tensors.Mask)
-
-    def test_forward_with_masks_transform_disabled(
-        self,
-        random_affine_without_mask_transform: RandomAffine,
-        det_data_entity_with_masks: InstanceSegmentationSample,
-    ) -> None:
-        """Test forward with masks when transform_mask is False."""
-        original_entity = deepcopy(det_data_entity_with_masks)
-        results = random_affine_without_mask_transform(original_entity)
-
-        # Check that masks are present but not transformed
-        assert hasattr(results, "masks")
-        assert results.masks is not None
-
-        # Since transform_mask is False, masks should remain unchanged
-        # However, they might still be filtered based on valid bounding boxes
-        assert results.masks.shape[0] == results.bboxes.shape[0], (
-            f"results.masks.shape[0] = {results.masks.shape[0]}, results.bboxes.shape[0] = {results.bboxes.shape[0]}"
-        )
-        assert results.masks.shape[0] == results.label.shape[0], (
-            f"results.masks.shape[0] = {results.masks.shape[0]}, results.label.shape[0] = {results.label.shape[0]}"
-        )
-
-    def test_forward_with_empty_masks(
-        self,
-        random_affine_with_mask_transform: RandomAffine,
-        det_data_entity_empty_masks: InstanceSegmentationSample,
-    ) -> None:
-        """Test forward with empty masks."""
-        original_entity = deepcopy(det_data_entity_empty_masks)
-        results = random_affine_with_mask_transform(original_entity)
-
-        # Check that empty masks are handled correctly
-        assert hasattr(results, "masks")
-        assert results.masks is not None
-        assert results.masks.shape[0] == 0  # Should still be empty
-        assert results.masks.shape[1:] == results.image.shape[:2]  # Same spatial dimensions
-
-    def test_mask_fill_value_applied(
-        self,
-        det_data_entity_with_masks: InstanceSegmentationSample,
-        repeat: int = 10,
-    ) -> None:
-        """Test that mask_fill_value is applied correctly."""
-        # Test with different fill values
-        fill_values = [0, 128, 255]
-
-        for _ in range(repeat):
-            for fill_value in fill_values:
-                transform = RandomAffine(
-                    transform_mask=True,
-                    mask_fill_value=fill_value,
-                    max_rotate_degree=45,  # Force significant transformation
-                    max_translate_ratio=0.2,
-                    scaling_ratio_range=(0.8, 1.2),
-                    max_shear_degree=10,
-                )
-
-                original_entity = deepcopy(det_data_entity_with_masks)
-                results = transform(original_entity)
-
-                assert hasattr(results, "masks")
-                assert results.masks is not None
-                # The fill value should be used for areas outside the original mask
-                # This is hard to test directly, but we can check that the transform executed successfully
-                assert results.masks.shape[0] > 0
-
-    def test_mask_consistency_with_image_transform(
-        self,
-        det_data_entity_with_masks: InstanceSegmentationSample,
-    ) -> None:
-        """Test that masks and images are transformed consistently."""
-        # Create a transform with fixed parameters for reproducibility
-        transform = RandomAffine(
-            transform_mask=True,
-            mask_fill_value=0,
-            max_rotate_degree=0,  # No rotation for simpler testing
-            max_translate_ratio=0.1,  # Small translation
-            scaling_ratio_range=(1.0, 1.0),  # No scaling
-            max_shear_degree=0,  # No shear
-            is_numpy_to_tvtensor=False,
-        )
-
-        original_entity = deepcopy(det_data_entity_with_masks)
-        results = transform(original_entity)
-
-        # Check that image and masks have consistent dimensions
-        assert results.image.shape[:2] == results.masks.shape[1:]
-
-        # Check that masks are still properly shaped
-        assert len(results.masks.shape) == 3  # (N, H, W)
-        assert results.masks.shape[0] == results.bboxes.shape[0]
-
-    def test_mask_bbox_filtering_consistency(
-        self,
-        det_data_entity_with_masks: InstanceSegmentationSample,
-    ) -> None:
-        """Test that masks are filtered consistently with bboxes."""
-        # Create a transform that might filter out some bboxes
-        transform = RandomAffine(
-            transform_mask=True,
-            mask_fill_value=0,
-            bbox_clip_border=True,
-            max_rotate_degree=30,
-            max_translate_ratio=0.3,
-            scaling_ratio_range=(0.5, 1.5),
-            max_shear_degree=10,
-        )
-
-        original_entity = deepcopy(det_data_entity_with_masks)
-        original_num_objects = original_entity.masks.shape[0]
-
-        results = transform(original_entity)
-
-        # Check that the number of masks matches the number of valid bboxes and labels
-        assert results.masks.shape[0] == results.bboxes.shape[0]
-        assert results.masks.shape[0] == results.label.shape[0]
-
-        # The number of objects might be reduced due to filtering
-        assert results.masks.shape[0] <= original_num_objects
-
-
-class TestCachedMosaic:
-    @pytest.fixture
-    def cached_mosaic(self) -> CachedMosaic:
-        return CachedMosaic(img_scale=(128, 128), random_pop=False, max_cached_images=20, is_numpy_to_tvtensor=False)
-
-    def test_init_invalid_img_scale(self) -> None:
-        with pytest.raises(AssertionError):
-            CachedMosaic(img_scale=640, is_numpy_to_tvtensor=False)
-
-    def test_init_invalid_probability(self) -> None:
-        with pytest.raises(AssertionError):
-            CachedMosaic(probability=1.5, is_numpy_to_tvtensor=False)
-
-    def test_forward_pop_small_cache(
-        self,
-        cached_mosaic: CachedMosaic,
-        fxt_inst_seg_data_entity: tuple[tuple, OTXSample, OTXSampleBatch],
-    ) -> None:
-        """Test forward for popping cache."""
-        cached_mosaic.max_cached_images = 4
-        cached_mosaic.results_cache = [fxt_inst_seg_data_entity[0]] * cached_mosaic.max_cached_images
-
-        # 4 -> 5 thru append -> 4 thru pop -> return due to small cache
-        results = cached_mosaic(deepcopy(fxt_inst_seg_data_entity[0]))
-
-        # check pop
-        assert len(cached_mosaic.results_cache) == cached_mosaic.max_cached_images
-
-        # check small cache
-        assert torch.all(results.image == fxt_inst_seg_data_entity[0].image)
-        assert torch.all(results.bboxes == fxt_inst_seg_data_entity[0].bboxes)
-
-    def test_forward(
-        self,
-        cached_mosaic: CachedMosaic,
-        fxt_inst_seg_data_entity: tuple[tuple, OTXSample, OTXSampleBatch],
-    ) -> None:
-        """Test forward."""
-        entity = deepcopy(fxt_inst_seg_data_entity[0])
-        cached_mosaic.results_cache = [entity] * 4
-        cached_mosaic.prob = 1.0
-
-        results = cached_mosaic(deepcopy(entity))
-
-        assert results.image.shape[:2] == (256, 256)
-        assert results.label.shape[0] == results.bboxes.shape[0]
-        assert results.label.dtype == torch.int64
-        assert results.bboxes.dtype == torch.float32
-        assert results.img_info.img_shape == results.image.shape[:2]
-        assert results.masks.shape[1:] == (256, 256)
-
-
-class TestCachedMixUp:
-    @pytest.fixture
-    def cached_mixup(self) -> CachedMixUp:
-        return CachedMixUp(
-            ratio_range=(1.0, 1.0), probability=1.0, random_pop=False, max_cached_images=10, is_numpy_to_tvtensor=False
-        )
-
-    def test_init_invalid_img_scale(self) -> None:
-        with pytest.raises(AssertionError):
-            CachedMixUp(img_scale=640)
-
-    def test_init_invalid_probability(self) -> None:
-        with pytest.raises(AssertionError):
-            CachedMosaic(probability=1.5)
-
-    def test_forward_pop_small_cache(
-        self,
-        cached_mixup: CachedMixUp,
-        fxt_inst_seg_data_entity: tuple[tuple, OTXSample, OTXSampleBatch],
-    ) -> None:
-        """Test forward for popping cache."""
-        cached_mixup.max_cached_images = 1  # force to set to 1 for this test
-        cached_mixup.results_cache = [fxt_inst_seg_data_entity[0]] * cached_mixup.max_cached_images
-
-        # 1 -> 2 thru append -> 1 thru pop -> return due to small cache
-        results = cached_mixup(deepcopy(fxt_inst_seg_data_entity[0]))
-
-        # check pop
-        assert len(cached_mixup.results_cache) == cached_mixup.max_cached_images
-
-        # check small cache
-        assert torch.all(results.image == fxt_inst_seg_data_entity[0].image)
-        assert torch.all(results.bboxes == fxt_inst_seg_data_entity[0].bboxes)
-
-    def test_forward(
-        self,
-        cached_mixup: CachedMixUp,
-        fxt_inst_seg_data_entity: tuple[tuple, OTXSample, OTXSampleBatch],
-    ) -> None:
-        """Test forward."""
-        entity = deepcopy(fxt_inst_seg_data_entity[0])
-        cached_mixup.results_cache = [entity]
-        cached_mixup.prob = 1.0
-        cached_mixup.flip_ratio = 0.0
-
-        results = cached_mixup(deepcopy(entity))
-
-        assert results.image.shape[:2] == (64, 64)
-        assert results.label.shape[0] == results.bboxes.shape[0]
-        assert results.label.dtype == torch.int64
-        assert results.bboxes.dtype == torch.float32
-        assert results.img_info.img_shape == results.image.shape[:2]
-        assert results.masks.shape[1:] == (64, 64)
-
-
-class TestYOLOXHSVRandomAug:
-    @pytest.fixture
-    def yolox_hsv_random_aug(self) -> YOLOXHSVRandomAug:
-        return YOLOXHSVRandomAug(is_numpy_to_tvtensor=False)
-
-    def test_forward(self, yolox_hsv_random_aug: YOLOXHSVRandomAug, det_data_entity: DetectionSample) -> None:
-        """Test forward."""
-        results = yolox_hsv_random_aug(deepcopy(det_data_entity))
-
-        assert results.image.shape[:2] == (112, 224)
-        assert results.label.shape[0] == results.bboxes.shape[0]
-        assert results.label.dtype == torch.int64
-        assert results.bboxes.dtype == torch.float32
-
-
-class TestPad:
-    def test_forward(
-        self,
-        fxt_inst_seg_data_entity: tuple[tuple, OTXSample, OTXSampleBatch],
-    ) -> None:
-        entity = deepcopy(fxt_inst_seg_data_entity[0])
-
-        # test pad img/masks with size
-        transform = Pad(size=(96, 128), transform_mask=True, is_numpy_to_tvtensor=False)
-
-        results = transform(deepcopy(entity))
-
-        assert results.image.shape[:2] == (96, 128)
-        assert results.masks.shape[1:] == (96, 128)
-
-        # test pad img/masks with size_divisor
-        transform = Pad(size_divisor=11, transform_mask=True, is_numpy_to_tvtensor=False)
-
-        results = transform(deepcopy(entity))
-
-        # (64, 64) -> (66, 66)
-        assert results.image.shape[:2] == (66, 66)
-        assert results.masks.shape[1:] == (66, 66)
-
-        # test pad img/masks with pad_to_square
-        _transform = Pad(size=(96, 128), transform_mask=True, is_numpy_to_tvtensor=False)
-        entity = _transform(deepcopy(entity))
-        transform = Pad(pad_to_square=True, transform_mask=True, is_numpy_to_tvtensor=False)
-
-        results = transform(deepcopy(entity))
-
-        assert results.image.shape[:2] == (128, 128)
-        assert results.masks.shape[1:] == (128, 128)
-
-        # test pad img/masks with pad_to_square and size_divisor
-        _transform = Pad(size=(96, 128), transform_mask=True, is_numpy_to_tvtensor=False)
-        entity = _transform(deepcopy(entity))
-        transform = Pad(pad_to_square=True, size_divisor=11, transform_mask=True, is_numpy_to_tvtensor=False)
-
-        results = transform(deepcopy(entity))
-
-        assert results.image.shape[:2] == (132, 132)
-        assert results.masks.shape[1:] == (132, 132)
-
-
-class TestRandomResize:
-    def test_init(self):
-        transform = RandomResize((224, 224), (1.0, 2.0), is_numpy_to_tvtensor=False)
-        assert transform.scale == (224, 224)
-
-    def test_repr(self):
-        transform = RandomResize((224, 224), (1.0, 2.0), is_numpy_to_tvtensor=False)
-        transform_str = str(transform)
-        assert isinstance(transform_str, str)
-
-    def test_forward(self, fxt_inst_seg_data_entity: tuple[tuple, OTXSample, OTXSampleBatch]):
-        entity = deepcopy(fxt_inst_seg_data_entity[0])
-
-        # choose target scale from init when override is True
-        transform = RandomResize((224, 224), (1.0, 2.0), is_numpy_to_tvtensor=False)
-
-        results = transform(deepcopy(entity))
-
-        assert results.img_info.img_shape[0] >= 224
-        assert results.img_info.img_shape[0] <= 448
-        assert results.img_info.img_shape[1] >= 224
-        assert results.img_info.img_shape[1] <= 448
-
-        # keep ratio is True
-        transform = RandomResize(
-            (224, 224),
-            (1.0, 2.0),
-            is_numpy_to_tvtensor=False,
-            keep_ratio=True,
-            transform_bbox=True,
-            transform_mask=True,
-        )
-
-        results = transform(deepcopy(entity))
-        assert results.image.shape[0] >= 224
-        assert results.image.shape[0] <= 448
-        assert results.image.shape[1] >= 224
-        assert results.image.shape[1] <= 448
-        assert results.img_info.img_shape[0] >= 224
-        assert results.img_info.img_shape[0] <= 448
-        assert results.img_info.img_shape[1] >= 224
-        assert results.img_info.img_shape[1] <= 448
-        assert results.img_info.scale_factor[0] == results.img_info.scale_factor[1]
-        assert results.bboxes[0, 2] == entity.bboxes[0, 2] * results.img_info.scale_factor[0]
-        assert results.bboxes[0, 3] == entity.bboxes[0, 3] * results.img_info.scale_factor[1]
-        assert results.masks.shape[1] >= 224
-        assert results.masks.shape[1] <= 448
-        assert results.masks.shape[2] >= 224
-        assert results.masks.shape[2] <= 448
-
-        # keep ratio is False
-        transform = RandomResize(
-            (224, 224),
-            (1.0, 2.0),
-            keep_ratio=False,
-            transform_bbox=True,
-            transform_mask=True,
-            is_numpy_to_tvtensor=False,
-        )
-
-        results = transform(deepcopy(entity))
-
-        # choose target scale from init when override is False and scale is a list of tuples
-        transform = RandomResize(
-            [(448, 224), (224, 112)],
-            keep_ratio=False,
-            transform_bbox=True,
-            transform_mask=True,
-            is_numpy_to_tvtensor=False,
-        )
-
-        results = transform(deepcopy(entity))
-
-        assert results.img_info.img_shape[1] >= 112
-        assert results.img_info.img_shape[1] <= 224
-        assert results.img_info.img_shape[0] >= 224
-        assert results.img_info.img_shape[0] <= 448
-
-        # the type of scale is invalid in init
-        with pytest.raises(NotImplementedError):
-            RandomResize([(448, 224), [224, 112]], keep_ratio=True)(deepcopy(entity))
-
-
-class TestRandomCrop:
-    @pytest.fixture
-    def entity(self) -> ClassificationSample:
-        from datumaro.experimental.fields import ImageInfo as DmImageInfo
-
-        return ClassificationSample(
-            image=tv_tensors.Image(torch.randn((3, 24, 32), dtype=torch.float32)),
-            dm_image_info=DmImageInfo(height=24, width=32),
-            label=torch.LongTensor([0]),
-        )
-
-    @pytest.fixture
-    def det_entity(self) -> DetectionSample:
-        from datumaro.experimental.fields import ImageInfo as DmImageInfo
-
-        return DetectionSample(
-            image=tv_tensors.Image(torch.randn((3, 10, 10), dtype=torch.float32)),
-            dm_image_info=DmImageInfo(height=10, width=10),
-            bboxes=np.array([[0, 0, 7, 7], [2, 3, 9, 9]], dtype=np.float32),
-            label=torch.LongTensor([0, 1]),
-        )
-
-    @pytest.fixture
-    def iseg_entity(self) -> InstanceSegmentationSample:
-        from datumaro.experimental.fields import ImageInfo as DmImageInfo
-
-        masks = tv_tensors.Mask(np.zeros((2, 10, 10), np.uint8))
-        return InstanceSegmentationSample(
-            image=tv_tensors.Image(torch.randn((3, 10, 10), dtype=torch.float32)),
-            dm_image_info=DmImageInfo(height=10, width=10),
-            bboxes=np.array([[0, 0, 7, 7], [2, 3, 9, 9]], dtype=np.float32),
-            label=torch.LongTensor([0, 1]),
-            masks=masks,
-        )
-
-    def test_init_invalid_crop_type(self) -> None:
-        # test invalid crop_type
-        with pytest.raises(ValueError, match="Invalid crop_type"):
-            RandomCrop(crop_size=(10, 10), crop_type="unknown", is_numpy_to_tvtensor=False)
-
-    @pytest.mark.parametrize("crop_type", ["absolute", "absolute_range"])
-    @pytest.mark.parametrize("crop_size", [(0, 0), (0, 1), (1, 0)])
-    def test_init_invalid_value(self, crop_type: str, crop_size: tuple[int, int]) -> None:
-        # test h > 0 and w > 0
-        with pytest.raises(AssertionError):
-            RandomCrop(crop_size=crop_size, crop_type=crop_type, is_numpy_to_tvtensor=False)
-
-    @pytest.mark.parametrize("crop_type", ["absolute", "absolute_range"])
-    @pytest.mark.parametrize("crop_size", [(1.0, 1), (1, 1.0), (1.0, 1.0)])
-    def test_init_invalid_type(self, crop_type: str, crop_size: tuple[int, int]) -> None:
-        # test type(h) = int and type(w) = int
-        with pytest.raises(AssertionError):
-            RandomCrop(crop_size=crop_size, crop_type=crop_type, is_numpy_to_tvtensor=False)
-
-    def test_init_invalid_size(self) -> None:
-        # test crop_size[0] <= crop_size[1]
-        with pytest.raises(AssertionError):
-            RandomCrop(crop_size=(10, 5), crop_type="absolute_range", is_numpy_to_tvtensor=False)
-
-    @pytest.mark.parametrize("crop_type", ["relative_range", "relative"])
-    @pytest.mark.parametrize("crop_size", [(0, 1), (1, 0), (1.1, 0.5), (0.5, 1.1)])
-    def test_init_invalid_range(self, crop_type: str, crop_size: tuple[int | float]) -> None:
-        # test h in (0, 1] and w in (0, 1]
-        with pytest.raises(AssertionError):
-            RandomCrop(crop_size=crop_size, crop_type=crop_type, is_numpy_to_tvtensor=False)
-
-    @pytest.mark.parametrize(("crop_type", "crop_size"), [("relative", (0.5, 0.5)), ("absolute", (12, 16))])
-    def test_forward_relative_absolute(self, entity, crop_type: str, crop_size: tuple[float | int]) -> None:
-        # test relative and absolute crop
-        transform = RandomCrop(crop_size=crop_size, crop_type=crop_type, is_numpy_to_tvtensor=False)
-        target_shape = (12, 16)
-
-        results = transform(deepcopy(entity))
-
-        assert results.image.shape[:2] == target_shape
-
-    def test_forward_absolute_range(self, entity) -> None:
-        # test absolute_range crop
-        transform = RandomCrop(crop_size=(10, 20), crop_type="absolute_range", is_numpy_to_tvtensor=False)
-
-        results = transform(deepcopy(entity))
-
-        h, w = results.image.shape[:2]
-        assert 10 <= w <= 20
-        assert 10 <= h <= 20
-        assert results.img_info.img_shape == results.image.shape[:2]
-
-    def test_forward_relative_range(self, entity) -> None:
-        # test relative_range crop
-        transform = RandomCrop(crop_size=(0.9, 0.8), crop_type="relative_range", is_numpy_to_tvtensor=False)
-
-        results = transform(deepcopy(entity))
-
-        h, w = results.image.shape[:2]
-        assert 24 * 0.9 <= h <= 24
-        assert 32 * 0.8 <= w <= 32
-        assert results.img_info.img_shape == results.image.shape[:2]
-
-    def test_forward_bboxes_labels_masks(self, iseg_entity) -> None:
-        # test with bboxes, labels, and masks
-        transform = RandomCrop(
-            crop_size=(7, 5),
-            allow_negative_crop=False,
-            recompute_bbox=False,
-            bbox_clip_border=True,
-            is_numpy_to_tvtensor=False,
-        )
-
-        results = transform(deepcopy(iseg_entity))
-
-        assert results.image.shape[:2] == (7, 5)
-        assert results.bboxes.shape[0] == 2
-        assert results.label.shape[0] == 2
-        assert results.masks.shape[0] == 2
-        assert results.masks.shape[1:] == (7, 5)
-        assert results.img_info.img_shape == results.image.shape[:2]
-
-    def test_forward_recompute_bbox_from_mask(self, iseg_entity) -> None:
-        # test recompute_bbox = True
-        iseg_entity.bboxes = tv_tensors.wrap(torch.tensor([[0.1, 0.1, 0.2, 0.2]]), like=iseg_entity.bboxes)
-        iseg_entity.label = torch.LongTensor([0])
-        target_gt_bboxes = np.zeros((1, 4), dtype=np.float32)
-        transform = RandomCrop(
-            crop_size=(10, 11),
-            allow_negative_crop=False,
-            recompute_bbox=True,
-            bbox_clip_border=True,
-            is_numpy_to_tvtensor=False,
-        )
-        results = transform(deepcopy(iseg_entity))
-
-        assert np.all(results.bboxes.numpy() == target_gt_bboxes)
-
-    def test_forward_bbox_clip_border_false(self, det_entity) -> None:
-        # test bbox_clip_border = False
-        det_entity.bboxes = tv_tensors.wrap(torch.tensor([[0.1, 0.1, 0.2, 0.2]]), like=det_entity.bboxes)
-        det_entity.label = torch.LongTensor([0])
-        transform = RandomCrop(
-            crop_size=(10, 11),
-            allow_negative_crop=False,
-            recompute_bbox=True,
-            bbox_clip_border=False,
-            is_numpy_to_tvtensor=False,
-        )
-
-        results = transform(deepcopy(det_entity))
-
-        assert torch.all(results.bboxes == det_entity.bboxes)
-
-    @pytest.mark.parametrize("allow_negative_crop", [True, False])
-    def test_forward_allow_negative_crop(self, det_entity, allow_negative_crop: bool) -> None:
-        # test the crop does not contain any gt-bbox allow_negative_crop = False
-        det_entity.image = RNG.integers(0, 255, size=(10, 10), dtype=np.uint8)
-        det_entity.bboxes = tv_tensors.wrap(torch.zeros((0, 4)), like=det_entity.bboxes)
-        det_entity.label = torch.LongTensor()
-        transform = RandomCrop(crop_size=(5, 3), allow_negative_crop=allow_negative_crop, is_numpy_to_tvtensor=False)
-
-        results = transform(deepcopy(det_entity))
-
-        if allow_negative_crop:
-            assert results.image.shape == transform.crop_size
-            assert len(results.bboxes) == len(det_entity.bboxes) == 0
-        else:
-            assert results is None
-
-    def test_repr(self):
-        crop_type = "absolute"
-        crop_size = (10, 5)
-        allow_negative_crop = False
-        recompute_bbox = True
-        bbox_clip_border = False
-        transform = RandomCrop(
-            crop_size=crop_size,
-            crop_type=crop_type,
-            allow_negative_crop=allow_negative_crop,
-            recompute_bbox=recompute_bbox,
-            bbox_clip_border=bbox_clip_border,
-            is_numpy_to_tvtensor=False,
-        )
-        assert (
-            repr(transform) == f"RandomCrop(crop_size={crop_size}, crop_type={crop_type}, "
-            f"allow_negative_crop={allow_negative_crop}, "
-            f"recompute_bbox={recompute_bbox}, "
-            f"bbox_clip_border={bbox_clip_border}, "
-            f"is_numpy_to_tvtensor=False)"
-        )
-
-
-class TestTopdownAffine:
-    @pytest.fixture
-    def keypoint_det_entity(self) -> KeypointSample:
-        from datumaro.experimental.fields import ImageInfo as DmImageInfo
-
-        keypoints_data = torch.tensor([[0, 4, 1], [4, 2, 1], [2, 6, 1], [6, 0, 0]], dtype=torch.float32)
-        return KeypointSample(
-            image=tv_tensors.Image(torch.randint(0, 255, size=(3, 10, 10), dtype=torch.uint8)),
-            dm_image_info=DmImageInfo(height=10, width=10),
-            keypoints=keypoints_data,
-            label=torch.LongTensor([0]),
-        )
-
-    def test_forward(self, keypoint_det_entity) -> None:
-        transform = Compose(
-            [
-                TopdownAffine(input_size=(5, 5)),
-            ],
-        )
-        results = transform(deepcopy(keypoint_det_entity))
-        assert results.keypoints.shape == (4, 3)
-
-
-class TestCompose:
-    """Test Compose class with native torchvision transforms."""
-
-    @pytest.fixture
-    def basic_entity(self) -> DetectionSample:
-        """Create a basic data entity for testing."""
-        img_size = (64, 128)
-        return DetectionSample(
-            image=tv_tensors.Image(torch.randint(low=0, high=256, size=(3, *img_size), dtype=torch.uint8)),
-            dm_image_info=DmImageInfo(height=img_size[0], width=img_size[1]),
-            bboxes=np.array([[10, 10, 50, 50]], dtype=np.float32),
-            label=LongTensor([1]),
-        )
-
-    @pytest.fixture
-    def entity_with_masks(self) -> InstanceSegmentationSample:
-        """Create entity with masks."""
-        img_size = (64, 128)
-        masks = torch.zeros(size=(1, *img_size), dtype=torch.uint8)
-        masks[0, 10:50, 10:50] = 1
-        return InstanceSegmentationSample(
-            image=tv_tensors.Image(torch.randint(low=0, high=256, size=(3, *img_size), dtype=torch.uint8)),
-            dm_image_info=DmImageInfo(height=img_size[0], width=img_size[1]),
-            bboxes=np.array([[10, 10, 50, 50]], dtype=np.float32),
-            label=LongTensor([1]),
-            masks=tv_tensors.Mask(masks),
-        )
-
-    def test_compose_with_single_native_transform(self, basic_entity: DetectionSample) -> None:
-        """Test Compose with a single native torchvision transform."""
-
-        transform = Compose(
-            [
-                tvt_v2.Resize(size=(128, 256)),
-            ]
-        )
-
-        result = transform(basic_entity)
-
-        assert result is not None
-        assert result.image.shape[1:] == (128, 256)
-        assert result.img_info.img_shape == (128, 256)
-        assert result.img_info.ori_shape == (64, 128)
-
-    def test_compose_with_mixed_transforms(self, basic_entity: DetectionSample) -> None:
-        """Test Compose with both native and OTX transforms."""
-        transform = Compose(
-            [
-                Resize(scale=(128, 256), is_numpy_to_tvtensor=True),
-                tvt_v2.RandomHorizontalFlip(p=1.0),
-                Pad(size=(140, 300), is_numpy_to_tvtensor=True),
-            ]
-        )
-
-        result = transform(deepcopy(basic_entity))
-
-        assert result is not None
-        assert result.image.shape[1:] == (140, 300)
-        assert result.img_info.img_shape == (140, 300)
-        assert result.img_info.ori_shape == (64, 128)
-
-    def test_compose_native_transform_image_only(self, basic_entity: DetectionSample) -> None:
-        """Test that native transforms only affect image when appropriate."""
-        original_bboxes = basic_entity.bboxes.clone()
-        original_label = basic_entity.label.clone()
-
-        transform = Compose(
-            [
-                tvt_v2.ColorJitter(brightness=0.5, contrast=0.5),
-            ]
-        )
-
-        result = transform(deepcopy(basic_entity))
-
-        # Bboxes and labels should remain unchanged
-        assert torch.equal(result.bboxes, original_bboxes)
-        assert torch.equal(result.label, original_label)
-        assert result.img_info.ori_shape == (64, 128)
-        assert result.img_info.img_shape == (64, 128)
-
-    def test_compose_native_geometric_transform(self, entity_with_masks: InstanceSegmentationSample) -> None:
-        """Test native geometric transforms affect both image and annotations."""
-        transform = Compose(
-            [
-                tvt_v2.Resize(size=(128, 256)),
-            ]
-        )
-
-        result = transform(deepcopy(entity_with_masks))
-
-        assert result.image.shape[1:] == (128, 256)
-        assert result.bboxes.canvas_size == (128, 256)
-        assert result.img_info.img_shape == (128, 256)
-        assert result.img_info.ori_shape == (64, 128)
-        assert result.masks.shape[1:] == (128, 256)
-
-    def test_compose_returns_none_on_empty_crop(self, basic_entity: DetectionSample) -> None:
-        """Test that Compose properly handles None returns from transforms."""
-        # Create entity with bbox that won't survive crop
-        entity = deepcopy(basic_entity)
-        entity.bboxes = tv_tensors.BoundingBoxes(
-            data=torch.Tensor([[0, 0, 5, 5]]),
-            format="xyxy",
-            canvas_size=(64, 128),
-        )
-
-        transform = Compose(
-            [
-                RandomCrop(
-                    crop_size=(10, 10),
-                    allow_negative_crop=False,
-                    is_numpy_to_tvtensor=False,
-                ),
-            ]
-        )
-
-        result = transform(entity)
-        # Result might be None if crop doesn't contain bbox
-        assert result is None
-
-    def test_compose_img_info_update_with_resize(self, basic_entity: DetectionSample) -> None:
-        """Test img_info is properly updated with native Resize."""
-        original_shape = basic_entity.img_info.img_shape
-        target_size = (96, 192)
-
-        transform = Compose(
-            [
-                tvt_v2.Resize(size=target_size),
-            ]
-        )
-
-        result = transform(deepcopy(basic_entity))
-
-        # Check img_info is updated
-        assert result.img_info.img_shape == target_size
-        assert result.img_info.ori_shape == original_shape
-        assert result.img_info.scale_factor == (
-            target_size[0] / original_shape[0],
-            target_size[1] / original_shape[1],
-        )
-        assert result.image.shape[1:] == target_size
-
-    def test_compose_img_info_update_with_crop(self, basic_entity: DetectionSample) -> None:
-        """Test img_info is properly updated with native RandomCrop."""
-        # Use a large enough crop to ensure it contains the bbox
-        crop_size = (50, 100)
-
-        transform = Compose(
-            [
-                tvt_v2.RandomCrop(size=crop_size),
-            ]
-        )
-
-        result = transform(deepcopy(basic_entity))
-
-        # Check img_info reflects crop
-        assert result.img_info.img_shape == crop_size
-        assert result.image.shape[1:] == crop_size
-
-    def test_compose_img_info_with_padding(self, basic_entity: DetectionSample) -> None:
-        """Test img_info.padding is set correctly with Pad transform."""
-        target_size = (100, 200)
-
-        transform = Compose(
-            [
-                Pad(size=target_size, is_numpy_to_tvtensor=True),
-            ]
-        )
-
-        result = transform(deepcopy(basic_entity))
-
-        # Check padding info
-        assert hasattr(result.img_info, "padding")
-        assert result.img_info.img_shape == target_size
-        assert result.image.shape[1:] == target_size
-
-    def test_compose_img_info_chained_transforms(self, basic_entity: DetectionSample) -> None:
-        """Test img_info updates correctly through multiple transforms."""
-        transform = Compose(
-            [
-                Resize(scale=(100, 200), is_numpy_to_tvtensor=True),
-                Pad(size=(120, 240), is_numpy_to_tvtensor=True),
-                tvt_v2.Resize(size=(80, 160)),
-            ]
-        )
-
-        result = transform(deepcopy(basic_entity))
-
-        # Final img_info should reflect last transform
-        assert result.img_info.img_shape == (80, 160)
-        assert result.image.shape[1:] == (80, 160)
-        assert result.img_info.ori_shape == (64, 128)
-        assert result.img_info.scale_factor == (
-            80 / 64,
-            160 / 128,
-        )
-
-    def test_compose_label_key_mapping(self, basic_entity: DetectionSample) -> None:
-        """Test that 'label' is correctly mapped to 'labels' for native transforms."""
-        # Create a transform that requires labels key
-        transform = Compose(
-            [
-                tvt_v2.Resize(size=(128, 256)),
-            ]
-        )
-
-        result = transform(deepcopy(basic_entity))
-
-        # Label should still be accessible as 'label' attribute
-        assert hasattr(result, "label")
-        assert hasattr(result, "bboxes")
-        assert torch.equal(result.label, basic_entity.label)
-        assert result.bboxes.shape[0] == basic_entity.bboxes.shape[0]
-        assert result.bboxes.canvas_size == (128, 256)
-
-    def test_compose_classification(self) -> None:
-        """Test fast path when only image is present."""
-        # Create entity with only image
-        entity = ClassificationSample(
-            image=tv_tensors.Image(torch.randint(low=0, high=256, size=(3, 64, 128), dtype=torch.uint8)),
-            dm_image_info=DmImageInfo(height=64, width=128),
-            label=torch.LongTensor([0]),
-        )
-
-        transform = Compose(
-            [
-                tvt_v2.ColorJitter(brightness=0.5),
-                tvt_v2.RandomHorizontalFlip(p=1.0),
-                tvt_v2.Resize(size=(224, 224)),
-            ]
-        )
-
-        result = transform(entity)
-
-        assert result.image.shape == (3, 224, 224)
-        assert result.img_info.img_shape == (224, 224)
-        assert result.img_info.ori_shape == (64, 128)
-        assert result.label.shape == entity.label.shape
-        assert torch.equal(result.label, entity.label)
-
-    def test_compose_preserves_non_transformable_attrs(self, basic_entity: DetectionSample) -> None:
-        """Test that non-transformable attributes are preserved."""
-        # Add custom attribute
-        basic_entity.custom_attr = "test_value"
-
-        transform = Compose(
-            [
-                tvt_v2.Resize(size=(128, 256)),
-            ]
-        )
-
-        result = transform(deepcopy(basic_entity))
-
-        # Custom attribute should be preserved
-        assert hasattr(result, "custom_attr")
-        assert result.custom_attr == "test_value"
-
-    def test_compose_native_transform_with_multiple_inputs(self, entity_with_masks: InstanceSegmentationSample) -> None:
-        """Test native transform handles multiple transformable inputs correctly."""
-        transform = Compose(
-            [
-                tvt_v2.Resize(size=(100, 200)),
-                tvt_v2.RandomHorizontalFlip(p=1.0),
-            ]
-        )
-
-        result = transform(deepcopy(entity_with_masks))
-
-        # All transformable fields should be transformed
-        assert result.image.shape[1:] == (100, 200)
-        assert result.bboxes.canvas_size == (100, 200)
-        assert result.masks.shape[1:] == (100, 200)
-
-
-class TestRandomGaussianNoise:
-    def test_transform(self, det_data_entity) -> None:
-        transform = Compose(
-            [
-                ToDtype(torch.float32),
-                RandomGaussianNoise(mean=0.1, sigma=0.2, clip=True),
-            ],
-        )
-
-        new_det_data_entity = deepcopy(det_data_entity)
-        # test unscaled image in range [0, 255]
-        result = transform(new_det_data_entity)
-        assert not torch.all((result.image >= 0) & (result.image <= 1))
-        assert torch.all((result.image >= 0) & (result.image <= 255))
-
-        # test scaled image in range [0, 1]
-        new_image = torch.rand((3, 100, 100))
-        new_det_data_entity.image = new_image
-        result = transform(new_det_data_entity)
-        assert torch.all((result.image >= 0) & (result.image <= 1))
diff --git a/library/tests/unit/data/transform_libs/test_utils.py b/library/tests/unit/data/transform_libs/test_utils.py
deleted file mode 100644
index bcff4c59838..00000000000
--- a/library/tests/unit/data/transform_libs/test_utils.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-from __future__ import annotations
-
-from copy import deepcopy
-
-import numpy as np
-import pytest
-import torch
-from torch import Tensor
-
-from otx.data.transform_libs.utils import get_image_shape, rescale_keypoints, rescale_size, to_np_image
-
-
-@pytest.mark.parametrize(("img", "expected_shape"), [(np.zeros((1, 2, 3)), (1, 2)), (torch.zeros((1, 2, 3)), (2, 3))])
-@pytest.mark.parametrize("is_list", [True, False])
-def test_get_image_shape(img: np.ndarray | Tensor | list, is_list: bool, expected_shape: tuple[int, int]) -> None:
-    if is_list:
-        img = [img, img]
-
-    results = get_image_shape(img)
-
-    assert results == expected_shape
-
-
-@pytest.mark.parametrize("img", [np.zeros((1, 2, 3)), torch.zeros((1, 2, 3))])
-@pytest.mark.parametrize("is_list", [True, False])
-def test_to_np_image(img: np.ndarray | Tensor | list, is_list: bool) -> None:
-    results = to_np_image(img)
-
-    if is_list:
-        assert all(isinstance(r, np.ndarray) for r in results)
-    else:
-        assert isinstance(results, np.ndarray)
-
-
-@pytest.mark.parametrize(
-    ("size", "scale", "expected_size"),
-    [
-        ((100, 200), 0.5, (50, 100)),
-        ((200, 100), 2, (400, 200)),
-        ((200, 100), (300, 300), (300, 150)),
-        ((200, 100), (50, 50), (50, 25)),
-    ],
-)
-def test_rescale_size(size: tuple[int, int], scale: float, expected_size: tuple[int, int]) -> None:
-    results = rescale_size(size, scale)
-
-    assert results == expected_size
-
-
-def test_rescale_keypoints():
-    keypoints = torch.tensor([[10, 20], [30, 40], [50, 60]], dtype=torch.float32)
-
-    # Test with a single float scale factor
-    scale_factor = 2.0
-    rescaled_keypoints = rescale_keypoints(deepcopy(keypoints), scale_factor)
-    expected_keypoints = torch.tensor([[20, 40], [60, 80], [100, 120]], dtype=torch.float32)
-    assert torch.allclose(rescaled_keypoints, expected_keypoints)
-
-    # Test with a tuple scale factor
-    scale_factor = (2.0, 0.5)
-    rescaled_keypoints = rescale_keypoints(deepcopy(keypoints), scale_factor)
-    expected_keypoints = torch.tensor([[5, 40], [15, 80], [25, 120]], dtype=torch.float32)
-    assert torch.allclose(rescaled_keypoints, expected_keypoints)
-
-    # Test with a different tuple scale factor
-    scale_factor = (0.5, 2.0)
-    rescaled_keypoints = rescale_keypoints(deepcopy(keypoints), scale_factor)
-    expected_keypoints = torch.tensor([[20, 10], [60, 20], [100, 30]], dtype=torch.float32)
-    assert torch.allclose(rescaled_keypoints, expected_keypoints)
-
-    # Test with a single float scale factor of 1.0 (no scaling)
-    scale_factor = 1.0
-    rescaled_keypoints = rescale_keypoints(deepcopy(keypoints), scale_factor)
-    expected_keypoints = keypoints
-    assert torch.allclose(rescaled_keypoints, expected_keypoints)
diff --git a/library/tests/unit/metrics/test_detection_threshold_logic.py b/library/tests/unit/metrics/test_detection_threshold_logic.py
index d30bd82544d..39afa2c22ed 100644
--- a/library/tests/unit/metrics/test_detection_threshold_logic.py
+++ b/library/tests/unit/metrics/test_detection_threshold_logic.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2025 Intel Corporation
+# Copyright (C) 2025-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 """Unit tests for detection model confidence threshold logic."""
 
@@ -57,7 +57,7 @@ def detection_model():
 def sample_predictions():
     """Create sample prediction data."""
     return OTXPredictionBatch(
-        images=[torch.rand(3, 416, 416), torch.rand(3, 416, 416)],
+        images=torch.stack([torch.rand(3, 416, 416), torch.rand(3, 416, 416)]),
         imgs_info=[
             ImageInfo(img_idx=0, img_shape=(3, 416, 416), ori_shape=(3, 416, 416)),
             ImageInfo(img_idx=1, img_shape=(3, 416, 416), ori_shape=(3, 416, 416)),
@@ -91,7 +91,7 @@ def sample_predictions():
 def sample_batch():
     """Create sample input batch."""
     return OTXSampleBatch(
-        images=[torch.rand(3, 416, 416), torch.rand(3, 416, 416)],
+        images=torch.stack([torch.rand(3, 416, 416), torch.rand(3, 416, 416)]),
         imgs_info=[
             ImageInfo(img_idx=0, img_shape=(3, 416, 416), ori_shape=(3, 416, 416)),
             ImageInfo(img_idx=1, img_shape=(3, 416, 416), ori_shape=(3, 416, 416)),
@@ -212,7 +212,7 @@ def test_filtering_preserves_tensor_types(self, detection_model, sample_predicti
     def test_filtering_with_none_outputs(self, detection_model):
         """Test filtering when outputs have None values."""
         preds_with_none = OTXPredictionBatch(
-            images=[torch.rand(3, 416, 416)],
+            images=torch.rand(1, 3, 416, 416),
             imgs_info=[ImageInfo(img_idx=0, img_shape=(3, 416, 416), ori_shape=(3, 416, 416))],
             scores=None,
             bboxes=None,
@@ -229,7 +229,7 @@ def test_filtering_with_none_outputs(self, detection_model):
     def test_filtering_empty_predictions(self, detection_model):
         """Test filtering with empty prediction lists."""
         empty_preds = OTXPredictionBatch(
-            images=[torch.rand(3, 416, 416), torch.rand(3, 416, 416)],
+            images=torch.stack([torch.rand(3, 416, 416), torch.rand(3, 416, 416)]),
             imgs_info=[
                 ImageInfo(img_idx=0, img_shape=(3, 416, 416), ori_shape=(3, 416, 416)),
                 ImageInfo(img_idx=1, img_shape=(3, 416, 416), ori_shape=(3, 416, 416)),
@@ -424,7 +424,7 @@ def test_validation_to_test_workflow(self, detection_model, sample_batch):
         ) as mock_convert:
             # Setup sample predictions with scores above and below threshold
             test_preds = OTXPredictionBatch(
-                images=[torch.rand(3, 416, 416)],
+                images=torch.rand(1, 3, 416, 416),
                 imgs_info=[ImageInfo(img_idx=0, img_shape=(3, 416, 416), ori_shape=(3, 416, 416))],
                 scores=[torch.tensor([0.9, 0.5, 0.3])],
                 bboxes=[
diff --git a/library/tests/unit/tools/test_auto_configurator.py b/library/tests/unit/tools/test_auto_configurator.py
index 123fdc9e851..acd3971d4f5 100644
--- a/library/tests/unit/tools/test_auto_configurator.py
+++ b/library/tests/unit/tools/test_auto_configurator.py
@@ -1,11 +1,10 @@
-# Copyright (C) 2024 Intel Corporation
+# Copyright (C) 2024-2026 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
 
 from pathlib import Path
 
 import pytest
-import torch
 
 from otx.backend.native.models.base import DataInputParams, OTXModel
 from otx.data.module import OTXDataModule
@@ -16,7 +15,6 @@
 )
 from otx.types.label import LabelInfo, SegLabelInfo
 from otx.types.task import OTXTaskType
-from otx.types.transformer_libs import TransformLibType
 from otx.utils.utils import should_pass_label_info
 
 
@@ -165,24 +163,9 @@ def test_update_ov_subset_pipeline(self) -> None:
         auto_configurator = AutoConfigurator(data_root=data_root, task="DETECTION")
 
         datamodule = auto_configurator.get_datamodule()
-        assert datamodule.test_subset.transforms == [
-            {
-                "class_path": "otx.data.transform_libs.torchvision.Resize",
-                "init_args": {
-                    "scale": (800, 992),
-                },
-            },
-            {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": torch.float32}},
-            {
-                "class_path": "torchvision.transforms.v2.Normalize",
-                "init_args": {"mean": [0.0, 0.0, 0.0], "std": [255.0, 255.0, 255.0]},
-            },
-        ]
-
-        assert datamodule.test_subset.transform_lib_type == TransformLibType.TORCHVISION
+        # The detection base config has augmentations_cpu with Resize
+        assert any("Resize" in aug.get("class_path", "") for aug in datamodule.test_subset.augmentations_cpu)
 
         updated_datamodule = auto_configurator.update_ov_subset_pipeline(datamodule, subset="test")
-        assert updated_datamodule.test_subset.transforms == [{"class_path": "torchvision.transforms.v2.ToImage"}]
-
-        assert updated_datamodule.test_subset.transform_lib_type == TransformLibType.TORCHVISION
+        assert updated_datamodule.test_subset.augmentations_cpu == [{"class_path": "torchvision.transforms.v2.ToImage"}]
         assert not updated_datamodule.tile_config.enable_tiler
diff --git a/library/tests/unit/tools/test_transforms_updater.py b/library/tests/unit/tools/test_transforms_updater.py
new file mode 100644
index 00000000000..44e68832320
--- /dev/null
+++ b/library/tests/unit/tools/test_transforms_updater.py
@@ -0,0 +1,529 @@
+# Copyright (C) 2024-2026 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""Unit tests for TransformsUpdater and HyperparametersUpdater classes."""
+
+from __future__ import annotations
+
+import pytest
+
+from otx.tools.converter import HyperparametersUpdater, TransformsUpdater
+
+
+class TestTransformsUpdater:
+    """Test TransformsUpdater augmentation mapping and parameter remapping."""
+
+    @pytest.fixture
+    def base_config(self):
+        """Create a base config with augmentations_cpu and augmentations_gpu."""
+        return {
+            "data": {
+                "train_subset": {
+                    "augmentations_cpu": [
+                        {
+                            "class_path": "otx.data.augmentation.transforms.Resize",
+                            "init_args": {"size": "$(input_size)"},
+                        }
+                    ],
+                    "augmentations_gpu": [
+                        {
+                            "class_path": "kornia.augmentation.Normalize",
+                            "init_args": {
+                                "mean": [0.485, 0.456, 0.406],
+                                "std": [0.229, 0.224, 0.225],
+                            },
+                        }
+                    ],
+                },
+                "tile_config": {"enable_tiler": False},
+            }
+        }
+
+    def test_param_rename_simple(self, base_config):
+        """Test simple parameter renames: probability -> p, sigma -> std."""
+        aug_params = {
+            "random_horizontal_flip": {
+                "enable": True,
+                "probability": 0.7,
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        flip_aug = next(
+            (a for a in gpu_augs if "RandomHorizontalFlip" in a.get("class_path", "")),
+            None,
+        )
+        assert flip_aug is not None
+        assert flip_aug["init_args"]["p"] == 0.7
+
+    def test_param_rename_affine_translate(self, base_config):
+        """Test affine parameter transform: max_translate_ratio -> translate [v, v]."""
+        aug_params = {
+            "random_affine": {
+                "enable": True,
+                "max_rotate_degree": 30.0,
+                "max_translate_ratio": 0.15,
+                "scaling_ratio_range": [0.8, 1.2],
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        affine_aug = next(
+            (a for a in gpu_augs if "RandomAffine" in a.get("class_path", "")),
+            None,
+        )
+        assert affine_aug is not None
+        assert affine_aug["init_args"]["degrees"] == 30.0
+        assert affine_aug["init_args"]["translate"] == [0.15, 0.15]
+        assert affine_aug["init_args"]["scale"] == [0.8, 1.2]
+
+    def test_param_rename_affine_shear(self, base_config):
+        """Test affine parameter transform: max_shear_degree -> shear [-v, v]."""
+        aug_params = {
+            "random_affine": {
+                "enable": True,
+                "max_shear_degree": 5.0,
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        affine_aug = next(
+            (a for a in gpu_augs if "RandomAffine" in a.get("class_path", "")),
+            None,
+        )
+        assert affine_aug is not None
+        assert affine_aug["init_args"]["shear"] == [-5.0, 5.0]
+
+    def test_param_value_already_list(self, base_config):
+        """Test that list values pass through unchanged."""
+        aug_params = {
+            "color_jitter": {
+                "enable": True,
+                "brightness": [0.875, 1.125],
+                "contrast": [0.5, 1.5],
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        jitter_aug = next(
+            (a for a in gpu_augs if "ColorJiggle" in a.get("class_path", "")),
+            None,
+        )
+        assert jitter_aug is not None
+        assert jitter_aug["init_args"]["brightness"] == [0.875, 1.125]
+        assert jitter_aug["init_args"]["contrast"] == [0.5, 1.5]
+
+    def test_kernel_size_scalar_to_list(self, base_config):
+        """Test kernel_size transforms int scalar to [v, v]."""
+        aug_params = {
+            "gaussian_blur": {
+                "enable": True,
+                "kernel_size": 5,
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        blur_aug = next(
+            (a for a in gpu_augs if "RandomGaussianBlur" in a.get("class_path", "")),
+            None,
+        )
+        assert blur_aug is not None
+        assert blur_aug["init_args"]["kernel_size"] == [5, 5]
+
+    def test_add_new_augmentation_cpu(self, base_config):
+        """Test adding new CPU augmentation when not present in config."""
+        aug_params = {
+            "random_resize_crop": {
+                "enable": True,
+                "scale": [0.1, 1.0],
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        cpu_augs = base_config["data"]["train_subset"]["augmentations_cpu"]
+        crop_aug = next(
+            (a for a in cpu_augs if "RandomResizedCrop" in a.get("class_path", "")),
+            None,
+        )
+        assert crop_aug is not None
+        assert crop_aug["init_args"]["scale"] == [0.1, 1.0]
+
+    def test_add_new_augmentation_gpu(self, base_config):
+        """Test adding new GPU augmentation when not present in config."""
+        aug_params = {
+            "gaussian_noise": {
+                "enable": True,
+                "mean": 0.0,
+                "sigma": 0.05,
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        noise_aug = next(
+            (a for a in gpu_augs if "RandomGaussianNoise" in a.get("class_path", "")),
+            None,
+        )
+        assert noise_aug is not None
+        assert noise_aug["init_args"]["mean"] == 0.0
+        assert noise_aug["init_args"]["std"] == 0.05  # sigma renamed to std
+
+    def test_update_existing_augmentation(self, base_config):
+        """Test updating parameters of existing augmentation."""
+        # Add flip to config first
+        base_config["data"]["train_subset"]["augmentations_gpu"].insert(
+            0,
+            {
+                "class_path": "kornia.augmentation.RandomHorizontalFlip",
+                "init_args": {"p": 0.5},
+            },
+        )
+
+        aug_params = {
+            "random_horizontal_flip": {
+                "enable": True,
+                "probability": 0.9,
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        flip_aug = next(
+            (a for a in gpu_augs if "RandomHorizontalFlip" in a.get("class_path", "")),
+            None,
+        )
+        assert flip_aug is not None
+        assert flip_aug["init_args"]["p"] == 0.9
+
+    def test_disable_augmentation(self, base_config):
+        """Test disabling (removing) an existing augmentation."""
+        base_config["data"]["train_subset"]["augmentations_gpu"].insert(
+            0,
+            {
+                "class_path": "kornia.augmentation.RandomVerticalFlip",
+                "init_args": {"p": 0.5},
+            },
+        )
+
+        aug_params = {
+            "random_vertical_flip": {
+                "enable": False,
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        vflip_aug = next(
+            (a for a in gpu_augs if "RandomVerticalFlip" in a.get("class_path", "")),
+            None,
+        )
+        assert vflip_aug is None
+
+    def test_disable_random_resize_crop_replaces_with_resize(self, base_config):
+        """Test that disabling random_resize_crop replaces it with Resize."""
+        base_config["data"]["train_subset"]["augmentations_cpu"].insert(
+            0,
+            {
+                "class_path": "torchvision.transforms.v2.RandomResizedCrop",
+                "init_args": {"size": "$(input_size)"},
+            },
+        )
+
+        aug_params = {
+            "random_resize_crop": {
+                "enable": False,
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        cpu_augs = base_config["data"]["train_subset"]["augmentations_cpu"]
+        resize_aug = next(
+            (a for a in cpu_augs if a["class_path"] == "otx.data.augmentation.transforms.Resize"),
+            None,
+        )
+        assert resize_aug is not None
+        assert "RandomResizedCrop" not in [a.get("class_path", "") for a in cpu_augs]
+
+    def test_insert_position_gpu_before_normalize(self, base_config):
+        """Test that new GPU augmentation is inserted before Normalize."""
+        aug_params = {
+            "color_jitter": {
+                "enable": True,
+                "brightness": 0.2,
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        normalize_idx = next(
+            (i for i, a in enumerate(gpu_augs) if "Normalize" in a.get("class_path", "")),
+            None,
+        )
+        jitter_idx = next(
+            (i for i, a in enumerate(gpu_augs) if "ColorJiggle" in a.get("class_path", "")),
+            None,
+        )
+        assert jitter_idx is not None
+        assert normalize_idx is not None
+        assert jitter_idx < normalize_idx
+
+    def test_insert_position_cpu_before_resize(self, base_config):
+        """Test that new CPU augmentation is inserted before Resize."""
+        aug_params = {
+            "iou_random_crop": {
+                "enable": True,
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        cpu_augs = base_config["data"]["train_subset"]["augmentations_cpu"]
+        resize_idx = next(
+            (
+                i
+                for i, a in enumerate(cpu_augs)
+                if "Resize" in a.get("class_path", "") and "RandomResizedCrop" not in a.get("class_path", "")
+            ),
+            None,
+        )
+        crop_idx = next(
+            (
+                i
+                for i, a in enumerate(cpu_augs)
+                if "MinIoURandomCrop" in a.get("class_path", "") or "RandomIoUCrop" in a.get("class_path", "")
+            ),
+            None,
+        )
+        assert crop_idx is not None
+        assert resize_idx is not None
+        assert crop_idx < resize_idx
+
+    def test_unknown_augmentation_raises_error(self, base_config):
+        """Test that unknown augmentation name raises ValueError."""
+        aug_params = {
+            "unknown_aug": {
+                "enable": True,
+            }
+        }
+        with pytest.raises(ValueError, match="Unknown augmentation"):
+            TransformsUpdater.update(aug_params, base_config)
+
+    def test_empty_augmentation_params(self, base_config):
+        """Test that empty augmentation params don't modify config."""
+        original_gpu_len = len(base_config["data"]["train_subset"]["augmentations_gpu"])
+        original_cpu_len = len(base_config["data"]["train_subset"]["augmentations_cpu"])
+
+        aug_params = {}
+        TransformsUpdater.update(aug_params, base_config)
+
+        assert len(base_config["data"]["train_subset"]["augmentations_gpu"]) == original_gpu_len
+        assert len(base_config["data"]["train_subset"]["augmentations_cpu"]) == original_cpu_len
+
+    def test_none_param_values_are_skipped(self, base_config):
+        """Test that None parameter values are skipped."""
+        aug_params = {
+            "color_jitter": {
+                "enable": True,
+                "brightness": 0.2,
+                "contrast": None,  # Should be skipped
+            }
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        jitter_aug = next(
+            (a for a in gpu_augs if "ColorJiggle" in a.get("class_path", "")),
+            None,
+        )
+        assert jitter_aug is not None
+        assert "brightness" in jitter_aug["init_args"]
+        assert "contrast" not in jitter_aug["init_args"]
+
+    def test_multiple_augmentations(self, base_config):
+        """Test updating multiple augmentations at once."""
+        aug_params = {
+            "random_affine": {
+                "enable": True,
+                "max_rotate_degree": 45.0,
+            },
+            "color_jitter": {
+                "enable": True,
+                "brightness": 0.3,
+            },
+            "random_horizontal_flip": {
+                "enable": True,
+                "probability": 0.6,
+            },
+        }
+        TransformsUpdater.update(aug_params, base_config)
+
+        gpu_augs = base_config["data"]["train_subset"]["augmentations_gpu"]
+        class_paths = [a.get("class_path", "") for a in gpu_augs]
+        assert any("RandomAffine" in cp for cp in class_paths)
+        assert any("ColorJiggle" in cp for cp in class_paths)
+        assert any("RandomHorizontalFlip" in cp for cp in class_paths)
+
+    def test_update_tiling_enabled(self, base_config):
+        """Test enabling tiling with update_tiling method."""
+        tiling_dict = {
+            "enable": True,
+            "adaptive_tiling": True,
+            "tile_size": 800,
+            "tile_overlap": 0.2,
+        }
+        TransformsUpdater.update_tiling(tiling_dict, base_config)
+
+        assert base_config["data"]["tile_config"]["enable_tiler"] is True
+        assert base_config["data"]["tile_config"]["enable_adaptive_tiling"] is True
+        assert base_config["data"]["tile_config"]["tile_size"] == (800, 800)
+        assert base_config["data"]["tile_config"]["overlap"] == 0.2
+
+    def test_update_tiling_disabled(self, base_config):
+        """Test disabling tiling with update_tiling method."""
+        tiling_dict = {
+            "enable": False,
+            "adaptive_tiling": False,
+            "tile_size": 0,
+            "tile_overlap": 0.0,
+        }
+        TransformsUpdater.update_tiling(tiling_dict, base_config)
+
+        assert base_config["data"]["tile_config"]["enable_tiler"] is False
+
+    def test_update_tiling_none(self, base_config):
+        """Test that None tiling dict skips update."""
+        original_tiler = base_config["data"]["tile_config"].get("enable_tiler", False)
+        TransformsUpdater.update_tiling(None, base_config)
+
+        assert base_config["data"]["tile_config"].get("enable_tiler", False) == original_tiler
+
+
+class TestHyperparametersUpdater:
+    """Test HyperparametersUpdater for training hyperparameter updates."""
+
+    @pytest.fixture
+    def base_config(self):
+        """Create a base config with training-related sections."""
+        return {
+            "data": {
+                "input_size": (640, 640),
+                "train_subset": {
+                    "batch_size": 32,
+                },
+                "val_subset": {
+                    "batch_size": 32,
+                },
+            },
+            "model": {
+                "init_args": {
+                    "optimizer": {
+                        "init_args": {
+                            "lr": 0.001,
+                        }
+                    }
+                }
+            },
+            "max_epochs": 50,
+            "callbacks": [
+                {
+                    "class_path": "otx.backend.native.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup",
+                    "init_args": {
+                        "patience": 10,
+                    },
+                },
+            ],
+        }
+
+    def test_update_learning_rate(self, base_config):
+        """Test updating learning rate."""
+        HyperparametersUpdater.update({"learning_rate": 0.0001}, base_config)
+
+        assert base_config["model"]["init_args"]["optimizer"]["init_args"]["lr"] == 0.0001
+
+    def test_update_batch_size(self, base_config):
+        """Test updating batch size for train and val."""
+        HyperparametersUpdater.update({"batch_size": 16}, base_config)
+
+        assert base_config["data"]["train_subset"]["batch_size"] == 16
+        assert base_config["data"]["val_subset"]["batch_size"] == 16
+
+    def test_update_max_epochs(self, base_config):
+        """Test updating max epochs."""
+        HyperparametersUpdater.update({"max_epochs": 200}, base_config)
+
+        assert base_config["max_epochs"] == 200
+
+    def test_update_input_size(self, base_config):
+        """Test updating input size."""
+        HyperparametersUpdater.update({"input_size": (512, 512)}, base_config)
+
+        assert base_config["data"]["input_size"] == (512, 512)
+
+    def test_update_early_stopping_disable(self, base_config):
+        """Test disabling early stopping."""
+        HyperparametersUpdater.update(
+            {"early_stopping": {"enable": False, "patience": 10}},
+            base_config,
+        )
+
+        # Callback should be removed
+        assert not any("EarlyStoppingWithWarmup" in cb.get("class_path", "") for cb in base_config["callbacks"])
+
+    def test_update_early_stopping_enable(self, base_config):
+        """Test updating early stopping patience."""
+        HyperparametersUpdater.update(
+            {"early_stopping": {"enable": True, "patience": 20}},
+            base_config,
+        )
+
+        callback = next(
+            (cb for cb in base_config["callbacks"] if "EarlyStoppingWithWarmup" in cb.get("class_path", "")),
+            None,
+        )
+        assert callback is not None
+        assert callback["init_args"]["patience"] == 20
+
+    def test_update_multiple_hyperparams(self, base_config):
+        """Test updating multiple hyperparameters at once."""
+        HyperparametersUpdater.update(
+            {
+                "learning_rate": 0.0005,
+                "batch_size": 64,
+                "max_epochs": 150,
+                "input_size": (768, 768),
+            },
+            base_config,
+        )
+
+        assert base_config["model"]["init_args"]["optimizer"]["init_args"]["lr"] == 0.0005
+        assert base_config["data"]["train_subset"]["batch_size"] == 64
+        assert base_config["data"]["val_subset"]["batch_size"] == 64
+        assert base_config["max_epochs"] == 150
+        assert base_config["data"]["input_size"] == (768, 768)
+
+    def test_update_with_none_values(self, base_config):
+        """Test that None values are skipped."""
+        original_config = dict(base_config)
+
+        HyperparametersUpdater.update(
+            {
+                "learning_rate": None,
+                "batch_size": None,
+                "input_size": (None, None),
+            },
+            base_config,
+        )
+
+        # Config should remain unchanged
+        assert (
+            base_config["model"]["init_args"]["optimizer"]["init_args"]["lr"]
+            == original_config["model"]["init_args"]["optimizer"]["init_args"]["lr"]
+        )
+        assert (
+            base_config["data"]["train_subset"]["batch_size"] == original_config["data"]["train_subset"]["batch_size"]
+        )
+        assert base_config["data"]["input_size"] == original_config["data"]["input_size"]
diff --git a/library/uv.lock b/library/uv.lock
index 39be2702380..331b38f369f 100644
--- a/library/uv.lock
+++ b/library/uv.lock
@@ -832,7 +832,7 @@ wheels = [
 [[package]]
 name = "datumaro"
 version = "2.0.0"
-source = { git = "https://github.com/open-edge-platform/datumaro.git?rev=develop#cd8e4cbf9b577bf51ddcf10aaa5b926d94f82294" }
+source = { git = "https://github.com/open-edge-platform/datumaro.git?rev=develop#86aa54056927048a5424956379cb93d2680e71b9" }
 dependencies = [
     { name = "attrs" },
     { name = "cachetools" },
@@ -1864,6 +1864,61 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0a/dd/8050c947d435c8d4bc94e3252f4d8bb8a76cfb424f043a8680be637a57f1/kiwisolver-1.5.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:59cd8683f575d96df5bb48f6add94afc055012c29e28124fcae2b63661b9efb1", size = 73558, upload-time = "2026-03-09T13:15:52.112Z" },
 ]
 
+[[package]]
+name = "kornia"
+version = "0.8.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "kornia-rs" },
+    { name = "packaging" },
+    { name = "torch", version = "2.10.0", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform != 'linux' and sys_platform != 'win32' and extra == 'extra-3-otx-cpu') or (sys_platform == 'linux' and extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (sys_platform == 'linux' and extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (sys_platform == 'win32' and extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (sys_platform == 'win32' and extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra != 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra != 'extra-3-otx-cpu' and extra != 'extra-3-otx-cuda' and extra != 'extra-3-otx-xpu')" },
+    { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(sys_platform == 'linux' and extra == 'extra-3-otx-cpu') or (sys_platform == 'win32' and extra == 'extra-3-otx-cpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu')" },
+    { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "extra == 'extra-3-otx-cuda' or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu')" },
+    { name = "torch", version = "2.10.0+xpu", source = { registry = "https://download.pytorch.org/whl/xpu" }, marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra != 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra != 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/e6/45e757d4924176e4d4e111e10effaab7db382313243e0188a06805010073/kornia-0.8.2.tar.gz", hash = "sha256:5411b2ce0dd909d1608016308cd68faeef90f88c47f47e8ecd40553fd4d8b937", size = 667151, upload-time = "2025-11-08T12:10:03.042Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/d4/e9bd12b7b4cbd23b4dfb47e744ee1fa54d6d9c3c9bc406ec86c1be8c8307/kornia-0.8.2-py2.py3-none-any.whl", hash = "sha256:32dfe77c9c74a87a2de49395aa3c2c376a1b63c27611a298b394d02d13905819", size = 1095012, upload-time = "2025-11-08T12:10:01.226Z" },
+]
+
+[[package]]
+name = "kornia-rs"
+version = "0.1.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ab/17/8b3518ece01512a575b18f86b346879793d3dea264b314796bbd44d42e11/kornia_rs-0.1.10.tar.gz", hash = "sha256:5fd3fbc65240fa751975f5870b079f98e7fdcaa2885ea577b3da324d8bf01d81", size = 145610, upload-time = "2025-11-08T11:29:32.399Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/25/ab91a87cefd8d92a10749fa5d923366dfd2a2d240d9e57260e4218e9a5af/kornia_rs-0.1.10-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6757940733f13c52c4f142b9b11e3e9bd12ef9d209e333300602e86e21f5ae2f", size = 2811949, upload-time = "2025-11-08T11:30:19.768Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/61/6125a970249e04dd31cf3edf3fb0ceb98ea65269bc416ba48fd70f9a8f5e/kornia_rs-0.1.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:68e90101a34ba2bbce920332b25fd4d25c8c546d9a241b2606a6d886df2dd1ed", size = 2078639, upload-time = "2025-11-08T11:30:06.363Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/e4/c3484e5921a08e6368f0565c30646741fd12b46cb45c962d519cac3d12ad/kornia_rs-0.1.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b0adb81858a8963455f2f0da01fcd6ea3296147b918306488edeeaf6bc2a979", size = 2204722, upload-time = "2025-11-08T11:29:33.566Z" },
+    { url = "https://files.pythonhosted.org/packages/93/a4/2e6e33da900f19ae6411bfad41d317e56f1ae4f204bd73e61f0881bd5418/kornia_rs-0.1.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c3e237a8428524ad9f86599c0c47b355bc3007669fe297ea3fbd59cd64bc2f7", size = 3042890, upload-time = "2025-11-08T11:29:50.15Z" },
+    { url = "https://files.pythonhosted.org/packages/40/48/5e171c98b742139bebd1bd593d768e3c045f824bf0ae14190b63f0ac0acc/kornia_rs-0.1.10-cp311-cp311-win_amd64.whl", hash = "sha256:1d300ea6d4666e47302fba6cc438556d91e37ce41caf291a9a04a8f74c231d0b", size = 2544572, upload-time = "2025-11-08T11:30:32.32Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/6c/8248f08c90a10d6b8ca2e74783da8df7fa509f46b64a3b4fbb7dd0ac4e9c/kornia_rs-0.1.10-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f0809277e51156d59be3c39605ba9659e94f7a4cf3b0b6c035ec2f06f6067881", size = 2811606, upload-time = "2025-11-08T11:30:21.346Z" },
+    { url = "https://files.pythonhosted.org/packages/83/dc/29e5710cbc5d01c155ee1fd7621db48b94378a7ae394741bb34a6bfb36d9/kornia_rs-0.1.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8ecf2ba0291cc1bb178073d56e46b16296a8864a20272b63af02ee88771cb574", size = 2076141, upload-time = "2025-11-08T11:30:07.527Z" },
+    { url = "https://files.pythonhosted.org/packages/68/f7/0b3e90b9d0a25e6211c7ac9fa1dfed4db1306a812c359ee49678390a1bdc/kornia_rs-0.1.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d874ca12dd58871f9849672d9bf9fa998398470a88b52d61223ce2133b196662", size = 2205562, upload-time = "2025-11-08T11:29:35.353Z" },
+    { url = "https://files.pythonhosted.org/packages/63/d4/315f358b2a2c29d9af3a73f3d1973c2fd8e0cdeb65a57af98643e66fa7c8/kornia_rs-0.1.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f332a2a034cc791006f25c2d85e342a060887145e9236e8e43562badcadededf", size = 3042197, upload-time = "2025-11-08T11:29:51.614Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/b8/0ddbdf1d35fec3ef24f5b8cc29eb633ce5ce16c94c9fb090408c1280abe9/kornia_rs-0.1.10-cp312-cp312-win_amd64.whl", hash = "sha256:34111ce1c8abe930079b4b0aeb8d372f876c621a867ed03f77181de685e71a8f", size = 2539656, upload-time = "2025-11-08T11:30:33.908Z" },
+    { url = "https://files.pythonhosted.org/packages/90/01/1d658b11635431f8c31f416c90ca99befdc1f4fdd20e91a05b480b9c0ea8/kornia_rs-0.1.10-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:950a943f91c2cff94d80282886b0d48bbc15ef4a7cc4b15ac819724dfdb2f414", size = 2811810, upload-time = "2025-11-08T11:30:22.497Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/ed/bd970ded1d819557cc33055d982b1847eb385151ea5b0c915c16ed74f5c0/kornia_rs-0.1.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:63b802aaf95590276d3426edc6d23ff11caf269d2bc2ec37cb6c679b7b2a8ee0", size = 2076195, upload-time = "2025-11-08T11:30:08.726Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/10/afd700455105fdba5b043d724f3a65ca36259b89c736a3b71d5a03103808/kornia_rs-0.1.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38087da7cdf2bffe10530c0d53335dd1fc107fae6521f2dd4797c6522b6d11b3", size = 2205781, upload-time = "2025-11-08T11:29:36.8Z" },
+    { url = "https://files.pythonhosted.org/packages/25/16/ec8dc3ce1d79660ddd6a186a77037e0c3bf61648e6c72250280b648fb291/kornia_rs-0.1.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa3464de8f9920d87415721c36840ceea23e054dcb54dd9f69189ba9eabce0c7", size = 3042272, upload-time = "2025-11-08T11:29:52.936Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/75/62785aba777d35a562a97a987d65840306fab7a8ecd2d928dd8ac779e29b/kornia_rs-0.1.10-cp313-cp313-win_amd64.whl", hash = "sha256:c57d157bebe64c22e2e44c72455b1c7365eee4d767e0c187dc28f22d072ebaf7", size = 2539802, upload-time = "2025-11-08T11:30:35.753Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/d5/32b23d110109eb77b2dc952be75411f7e495da9105058e2cb08924a9cc90/kornia_rs-0.1.10-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:0b375f02422ef5986caed612799b4ddcc91f57f303906868b0a8c397a17e7607", size = 2810244, upload-time = "2025-11-08T11:30:23.637Z" },
+    { url = "https://files.pythonhosted.org/packages/96/5f/5ecde42b7c18e7df26c413848a98744427c3d370f5eed725b65f0bc356fb/kornia_rs-0.1.10-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f2bcfa438d6b5dbe07d573afc980f2871f6639b2eac5148b8c0bba4f82357b9a", size = 2074220, upload-time = "2025-11-08T11:30:09.972Z" },
+    { url = "https://files.pythonhosted.org/packages/18/6c/6fc86eb855bcc723924c3b91de98dc6c0f381987ce582e080b8eade3bc88/kornia_rs-0.1.10-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:021b0a02b2356b12b3954a298f369ed4fe2dd522dcf8b6d72f91bf3bd8eea201", size = 2204672, upload-time = "2025-11-08T11:29:38.777Z" },
+    { url = "https://files.pythonhosted.org/packages/19/26/3ac706d1b36761c0f7a36934327079adcb42d761c8c219865123d49fc1b2/kornia_rs-0.1.10-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d9b07e2ae79e423b3248d94afd092e324c5ddfe3157fafc047531cc8bffa6a3", size = 3042797, upload-time = "2025-11-08T11:29:54.719Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/f4/d62728d86bc67f5516249b154ff0bdfcf38a854dae284ff0ce62da87af99/kornia_rs-0.1.10-cp313-cp313t-win_amd64.whl", hash = "sha256:b80a037e34d63cb021bcd5fc571e41aff804a2981311f66e883768c6b8e5f8de", size = 2543855, upload-time = "2025-11-08T11:30:37.437Z" },
+    { url = "https://files.pythonhosted.org/packages/91/d5/8ed1288a51d2ad71a6c01152ceccdd2d92f21692dfd2304b1ae9383496fa/kornia_rs-0.1.10-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:119eb434d1384257cae6c1ee9444e1aa1b0fda617f6d5a79fef3f145fdac70ac", size = 2809873, upload-time = "2025-11-08T11:30:24.958Z" },
+    { url = "https://files.pythonhosted.org/packages/54/2b/fd5f919723aaa69ec5c1e60b10b7904a9126be5b9d6ccc0267fa42ca77e0/kornia_rs-0.1.10-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:60bca692911e5969e51d256299ecc6e90d32b9a2c5bf7bd1c7eb8f096cb9234b", size = 2074360, upload-time = "2025-11-08T11:30:11.327Z" },
+    { url = "https://files.pythonhosted.org/packages/43/ec/7987aa5fb7d188180866bd8dafa5bb5b1f00a74ba738bb4e2abe63c589ac/kornia_rs-0.1.10-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61f126644f49ff9947d9402126edacfeeb4b47c0999a7af487d27ce4fc4cbc2a", size = 2206111, upload-time = "2025-11-08T11:29:40.608Z" },
+    { url = "https://files.pythonhosted.org/packages/91/08/cb73b7e87a07b2af1146988d159d48722f0a28f550f920397c8964ab7c19/kornia_rs-0.1.10-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:614aeffdb1d39c4041ace0e0fc318b36eb211f6c133912984e946174e67dbb42", size = 3041436, upload-time = "2025-11-08T11:29:55.984Z" },
+    { url = "https://files.pythonhosted.org/packages/db/e2/9f50fce2d8e9edd6b2d09908b6d5613f9ead992bf2e80060e080f2e7d64d/kornia_rs-0.1.10-cp314-cp314-win_amd64.whl", hash = "sha256:6de4e73b1c90cc76b7486491866eb9e61e5cf34d3a4016957d4563ac7d3ee39a", size = 2544067, upload-time = "2025-11-08T11:30:38.638Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/8f/45895818f3c7a5009737119b075db6b88bbf00938275611bc5d2cfbd0b2a/kornia_rs-0.1.10-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:f0db8b41ae03a746bb0dcb970d5ff2fd66213adb4a3b4de1186fe86205698e89", size = 2806089, upload-time = "2025-11-08T11:30:26.117Z" },
+    { url = "https://files.pythonhosted.org/packages/38/af/831e79b45702f8b6102438b1ff9b44a912669890cdf209cd275257f6d655/kornia_rs-0.1.10-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9b63ee175125892ef18027bd3a43b447fd53f9bf42cea4d6f699ab4e69cf3f16", size = 2064116, upload-time = "2025-11-08T11:30:13.481Z" },
+    { url = "https://files.pythonhosted.org/packages/53/1b/e92606e0fa9a1b52ecf57faf322dcc076ae35315b4e1870d380fd64926d7/kornia_rs-0.1.10-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68eb25ba4639fa5e1cd94a10fb6410c8840c9f0162e5912d834c4a8c7c174493", size = 2197890, upload-time = "2025-11-08T11:29:42.273Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/fa/a2adce992b5eb65ef8adfc6f4465989948bfa8b875638e17c214541af25a/kornia_rs-0.1.10-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc18ba839f5c10ceb4757342ee7530cef8a0ecdd20486b8bbe14a56f72fa7037", size = 3040852, upload-time = "2025-11-08T11:29:57.856Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/36/40a3e3a235c370f5f61a8f9a7bdedf47d1bdd8f7d7e145e551545babff6b/kornia_rs-0.1.10-cp314-cp314t-win_amd64.whl", hash = "sha256:257eb0a780f990c0c44ac47acb77504dd95b8df0c592fd31354da1228df6678d", size = 2543609, upload-time = "2025-11-08T11:30:40.1Z" },
+]
+
 [[package]]
 name = "lightning"
 version = "2.6.0"
@@ -3264,6 +3319,7 @@ dependencies = [
     { name = "ftfy" },
     { name = "importlib-resources" },
     { name = "jsonargparse" },
+    { name = "kornia" },
     { name = "lightning" },
     { name = "nncf" },
     { name = "numpy" },
@@ -3364,6 +3420,7 @@ requires-dist = [
     { name = "furo", marker = "extra == 'docs'" },
     { name = "importlib-resources", specifier = "==6.5.2" },
     { name = "jsonargparse", specifier = "==4.35.0" },
+    { name = "kornia", specifier = "~=0.8.2" },
     { name = "lightning", specifier = "==2.6.0" },
     { name = "linkify-it-py", marker = "extra == 'docs'" },
     { name = "myst-parser", marker = "extra == 'docs'", specifier = ">=2.0.0" },
@@ -6072,24 +6129,24 @@ dependencies = [
     { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(sys_platform == 'linux' and extra == 'extra-3-otx-cpu') or (sys_platform == 'win32' and extra == 'extra-3-otx-cpu') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu') or (extra == 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu')" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:59be99d1c470ef470b134468aa6afa6f968081a503acb4ee883d70332f822e35" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:aa016ab73e06a886f72edc8929ed2ed4c85aaaa6e10500ecdef921b03129b19e" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:c7eb5f219fdfaf1f65e68c00eb81172ab4fa08a9874dae9dad2bca360da34d0f" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:727334e9a721cfc1ac296ce0bf9e69d9486821bfa5b1e75a8feb6f78041db481" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c1be164e93c68b2dbf460fd58975377c892dbcf3358fb72941709c3857351bba" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:2d444009c0956669ada149f61ed78f257c1cc96d259efa6acf3929ca96ceb3f0" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fe54cbd5942cd0b26a90f1748f0d4421caf67be35c281c6c3b8573733a03d630" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:90eec299e1f82cfaf080ccb789df3838cb9a54b57e2ebe33852cd392c692de5c" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:783c8fc580bbfc159bff52f4f72cdd538e42b32956e70dffa42b940db114e151" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e985e12a9a232618e5a43476de5689e4b14989f5da6b93909c57afa57ec27012" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:813f0106eb3e268f3783da67b882458e544c6fb72f946e6ca64b5ed4e62c6a77" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:9212210f417888e6261c040495180f053084812cf873dedba9fc51ff4b24b2d3" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0c2d0da9bc011a0fde1d125af396a8fbe94d99becf9d313764f24ca7657a3448" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4d72a57a8f0b5146e26dac1fbfa2c905280cd04f5fcb23b9c56253506b683aeb" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:499eae1e535766391b6ee2d1e6e841239c20e2e6d88203a15b8f9f8d60a1f8bd" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:7d47d544899fabac52ebe0d4812975608fd7ab79a3d7fb6383275eb667e33f53" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9511339b3b5eb75229e0b5041202e8aed9bef3b1de3a715b9fb319c9e97688fd" },
-    { url = "https://download.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:fb9f07f6a10f0ac24ac482ae68c6df99110b74a0d80a4c64fddc9753267d8815" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:59be99d1c470ef470b134468aa6afa6f968081a503acb4ee883d70332f822e35" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:aa016ab73e06a886f72edc8929ed2ed4c85aaaa6e10500ecdef921b03129b19e" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:c7eb5f219fdfaf1f65e68c00eb81172ab4fa08a9874dae9dad2bca360da34d0f" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:727334e9a721cfc1ac296ce0bf9e69d9486821bfa5b1e75a8feb6f78041db481" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c1be164e93c68b2dbf460fd58975377c892dbcf3358fb72941709c3857351bba" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:2d444009c0956669ada149f61ed78f257c1cc96d259efa6acf3929ca96ceb3f0" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:fe54cbd5942cd0b26a90f1748f0d4421caf67be35c281c6c3b8573733a03d630" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:90eec299e1f82cfaf080ccb789df3838cb9a54b57e2ebe33852cd392c692de5c" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:783c8fc580bbfc159bff52f4f72cdd538e42b32956e70dffa42b940db114e151" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e985e12a9a232618e5a43476de5689e4b14989f5da6b93909c57afa57ec27012" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:813f0106eb3e268f3783da67b882458e544c6fb72f946e6ca64b5ed4e62c6a77" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:9212210f417888e6261c040495180f053084812cf873dedba9fc51ff4b24b2d3" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0c2d0da9bc011a0fde1d125af396a8fbe94d99becf9d313764f24ca7657a3448" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4d72a57a8f0b5146e26dac1fbfa2c905280cd04f5fcb23b9c56253506b683aeb" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:499eae1e535766391b6ee2d1e6e841239c20e2e6d88203a15b8f9f8d60a1f8bd" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:7d47d544899fabac52ebe0d4812975608fd7ab79a3d7fb6383275eb667e33f53" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:9511339b3b5eb75229e0b5041202e8aed9bef3b1de3a715b9fb319c9e97688fd" },
+    { url = "https://download-r2.pytorch.org/whl/cpu/torchvision-0.25.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:fb9f07f6a10f0ac24ac482ae68c6df99110b74a0d80a4c64fddc9753267d8815" },
 ]
 
 [[package]]
@@ -6107,24 +6164,24 @@ dependencies = [
     { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "extra == 'extra-3-otx-cuda' or (extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu')" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5d576c65d40198627e0fad03bddeb0ef536371312f2bdfcc804c22fd28fa6018" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ebf2b495c76097796b9a2eac9290efbcae96e0fd9e5ae52c40eff188610bb440" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:af00b4e0cdb3f490f4393e9a335b622fe1b92fd5afb181033256ccba03b9637c" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8623e534ef6a815bd6407d4b52dd70c7154e2eda626ad4b9cb895d36c5a3305b" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:1255a0ca2bf987acf9f103b96c5c4cfe3415fc4a1eef17fa08af527a04a4f573" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:068e519838b4a8b32a09521244b170edd8c2ac9eeb6538b7bf492cd70e57ebf5" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:12c253520a26483fe3c614f63ff16eca6d9b0b4ebe510699b7d15d88e6c0cd35" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:a9c0de893dce9c2913c9c7ae88a916910f92d02b99da149678806d18e8079f29" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:e2e0317e3861bba1b5aeba7c1cb4bcd50937cf0bffdbea478619d1f5f73e9050" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:58b2971b55c761f1d2491bd80fcc4618ea97d363d387a9dd3aff23220cbee264" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:1b6878b043513ea3dea1b90bfb5193455d9b248b8c4d5e66ea9f5d1643a43f13" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:96cd2ba7b289117873b2a8f4c80605d38118d920b1045f3ce21a9f0ca68a701e" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e2dbf9ea9f4b2416822249e96ff3ad873d9a84e51285d6b9967732be3015c523" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:5b7ad3fb6cf03ef2a2fd617cb4b4e41efa9bb0143c67f506c2a3e6765c7b12ad" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314-win_amd64.whl", hash = "sha256:a52ff3b072e89280f41499813e11c418d168ffc502b86cb17767bab29f432b3a" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:687987fbcb074fd7f7a61cf2b407b1eac07588ace8351a3a36978546a00adc52" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:84c5e2cb699235339b8a5c295e974a795244a45d1104ecee658d9d19600cdc75" },
-    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314t-win_amd64.whl", hash = "sha256:d1cf27bc2da13fd9e83694ae601b1bf4135c24d9c9e9ec249056896395a78a9e" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5d576c65d40198627e0fad03bddeb0ef536371312f2bdfcc804c22fd28fa6018" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:ebf2b495c76097796b9a2eac9290efbcae96e0fd9e5ae52c40eff188610bb440" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:af00b4e0cdb3f490f4393e9a335b622fe1b92fd5afb181033256ccba03b9637c" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8623e534ef6a815bd6407d4b52dd70c7154e2eda626ad4b9cb895d36c5a3305b" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:1255a0ca2bf987acf9f103b96c5c4cfe3415fc4a1eef17fa08af527a04a4f573" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:068e519838b4a8b32a09521244b170edd8c2ac9eeb6538b7bf492cd70e57ebf5" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:12c253520a26483fe3c614f63ff16eca6d9b0b4ebe510699b7d15d88e6c0cd35" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:a9c0de893dce9c2913c9c7ae88a916910f92d02b99da149678806d18e8079f29" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:e2e0317e3861bba1b5aeba7c1cb4bcd50937cf0bffdbea478619d1f5f73e9050" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:58b2971b55c761f1d2491bd80fcc4618ea97d363d387a9dd3aff23220cbee264" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:1b6878b043513ea3dea1b90bfb5193455d9b248b8c4d5e66ea9f5d1643a43f13" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:96cd2ba7b289117873b2a8f4c80605d38118d920b1045f3ce21a9f0ca68a701e" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:e2dbf9ea9f4b2416822249e96ff3ad873d9a84e51285d6b9967732be3015c523" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:5b7ad3fb6cf03ef2a2fd617cb4b4e41efa9bb0143c67f506c2a3e6765c7b12ad" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314-win_amd64.whl", hash = "sha256:a52ff3b072e89280f41499813e11c418d168ffc502b86cb17767bab29f432b3a" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:687987fbcb074fd7f7a61cf2b407b1eac07588ace8351a3a36978546a00adc52" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:84c5e2cb699235339b8a5c295e974a795244a45d1104ecee658d9d19600cdc75" },
+    { url = "https://download-r2.pytorch.org/whl/cu128/torchvision-0.25.0%2Bcu128-cp314-cp314t-win_amd64.whl", hash = "sha256:d1cf27bc2da13fd9e83694ae601b1bf4135c24d9c9e9ec249056896395a78a9e" },
 ]
 
 [[package]]
@@ -6142,18 +6199,18 @@ dependencies = [
     { name = "torch", version = "2.10.0+xpu", source = { registry = "https://download.pytorch.org/whl/xpu" }, marker = "(extra == 'extra-3-otx-cpu' and extra == 'extra-3-otx-cuda') or (extra != 'extra-3-otx-cuda' and extra == 'extra-3-otx-xpu') or (extra != 'extra-3-otx-cpu' and extra == 'extra-3-otx-xpu')" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:bd6add201bd7628af70437292e1447abb368e0b5f4ff9abd334ae435efd44792" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp311-cp311-win_amd64.whl", hash = "sha256:36cbaedf10f6412af5c89afd9aeea474e6a56a0050348ada8fabe1ecaf6b879e" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6ad2543496bc29e59d3dd614a94d09aa9870318aedb66045344fffddfedd2cf8" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp312-cp312-win_amd64.whl", hash = "sha256:738357d97468d75fe3d510ac37e65130f2787f81d9bbc1518898f7396dc3403f" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80269f37865fcd8b57f20e4786efae2200bfa2b2727926c3c7acc82f0e7d3548" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313-win_amd64.whl", hash = "sha256:1c4b44b36a557f7381e3076fb8843366742238648441d607c8d049c6da0f8886" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:216ad249333993ed128368f996210cc9ceae3b4d15709b25aadba844d6c6e8b7" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313t-win_amd64.whl", hash = "sha256:7a04beba6859b76e9e010f2f0eccf13ce70ff5942944a552e83844c166051515" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b09bc9ef446628e6863ca685a8c75af31cd8e958a892b6e7abd7e690452ac608" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp314-cp314-win_amd64.whl", hash = "sha256:dfe2bcac32b9cfdd1f6dd6656579f22c1f73e6433c02b91928685060d5d0290f" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0f4d53af8f3e84e42ea7048cdff410a0b97eabb8cdc4c2e8c86a9acdaa69ffea" },
-    { url = "https://download.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp314-cp314t-win_amd64.whl", hash = "sha256:fcaa0c1d268f741adedd28be4825b237a67c4bc1ef62f60ef497e806f5542b19" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:bd6add201bd7628af70437292e1447abb368e0b5f4ff9abd334ae435efd44792" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp311-cp311-win_amd64.whl", hash = "sha256:36cbaedf10f6412af5c89afd9aeea474e6a56a0050348ada8fabe1ecaf6b879e" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6ad2543496bc29e59d3dd614a94d09aa9870318aedb66045344fffddfedd2cf8" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp312-cp312-win_amd64.whl", hash = "sha256:738357d97468d75fe3d510ac37e65130f2787f81d9bbc1518898f7396dc3403f" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:80269f37865fcd8b57f20e4786efae2200bfa2b2727926c3c7acc82f0e7d3548" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313-win_amd64.whl", hash = "sha256:1c4b44b36a557f7381e3076fb8843366742238648441d607c8d049c6da0f8886" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:216ad249333993ed128368f996210cc9ceae3b4d15709b25aadba844d6c6e8b7" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp313-cp313t-win_amd64.whl", hash = "sha256:7a04beba6859b76e9e010f2f0eccf13ce70ff5942944a552e83844c166051515" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b09bc9ef446628e6863ca685a8c75af31cd8e958a892b6e7abd7e690452ac608" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp314-cp314-win_amd64.whl", hash = "sha256:dfe2bcac32b9cfdd1f6dd6656579f22c1f73e6433c02b91928685060d5d0290f" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:0f4d53af8f3e84e42ea7048cdff410a0b97eabb8cdc4c2e8c86a9acdaa69ffea" },
+    { url = "https://download-r2.pytorch.org/whl/xpu/torchvision-0.25.0%2Bxpu-cp314-cp314t-win_amd64.whl", hash = "sha256:fcaa0c1d268f741adedd28be4825b237a67c4bc1ef62f60ef497e806f5542b19" },
 ]
 
 [[package]]