intel · Silv3S · Oct 23, 2025 · Oct 29, 2025 · Nov 3, 2025 · Nov 4, 2025
diff --git a/.github/scripts/microbench_summary.py b/.github/scripts/microbench_summary.py
@@ -14,7 +14,6 @@
 import argparse
 import bisect
 from pathlib import Path
-from typing import Dict, List
 
 def main():
     parser = argparse.ArgumentParser(
@@ -214,7 +213,7 @@ def get_op_pattern(base_op_name: str, get_backward: bool) -> tuple:
     else:
         return (base_op_name, f"{base_op_name} ")
 
-def process_l1_loss(content: str, case_name: str, data: List, columns: List):
+def process_l1_loss(content: str, case_name: str, data: list, columns: list):
     shape_matches = list(re.finditer(r"(shape\s*[:=].*?)(?=\n\S|$)", content))
     shape_lines = [match.group(0) for match in shape_matches]
     shape_positions = [match.start() for match in shape_matches]
@@ -281,7 +280,7 @@ def process_l1_loss(content: str, case_name: str, data: List, columns: List):
 
         data.append([record.get(col, "") for col in columns])
 
-def extract_times(content: str, pattern: str, get_backward: bool) -> List:
+def extract_times(content: str, pattern: str, get_backward: bool) -> list:
     lines = content.split('\n')
     results = []
     for line in lines:
@@ -297,8 +296,8 @@ def extract_times(content: str, pattern: str, get_backward: bool) -> List:
 
     return results
 
-def create_record(params: Dict, case_name: str, op_name: str,
-                 backward: str, time_us: float) -> Dict:
+def create_record(params: dict, case_name: str, op_name: str,
+                 backward: str, time_us: float) -> dict:
     return {
         "P": params.get("p", ""),
         **params,
@@ -316,7 +315,7 @@ def convert_to_us(value: float, unit: str) -> float:
         return value * 1_000_000
     return value
 
-def extract_params(text: str) -> Dict:
+def extract_params(text: str) -> dict:
     params = {}
     pairs = re.split(r'[;]', text.strip())
 

diff --git a/.github/scripts/op_perf_comparison.py b/.github/scripts/op_perf_comparison.py
@@ -26,7 +26,7 @@ def preprocess_row(row):
 def display_row(record):
     formatted = {}
     for key, value in record.items():
-        if isinstance(value, (list, tuple, dict)):
+        if isinstance(value, list | tuple | dict):
             formatted[key] = str(value)
         elif value == "NULL":
             formatted[key] = "NULL"

diff --git a/.lintrunner.toml b/.lintrunner.toml
@@ -34,7 +34,7 @@ init_command = [
     'mccabe==0.7.0',
     'pycodestyle==2.11.1',
     'pyflakes==3.1.0',
-    'torchfix==0.4.0 ; python_version >= "3.9" and python_version < "3.13"',
+    'torchfix==0.4.0 ; python_version < "3.13"',
 ]
 
 
@@ -83,11 +83,11 @@ init_command = [
     'python3',
     'tools/linter/adapters/pip_init.py',
     '--dry-run={{DRYRUN}}',
-    'numpy==1.26.4 ; python_version >= "3.9" and python_version <= "3.11"',
+    'numpy==1.26.4 ; python_version <= "3.11"',
     'numpy==2.1.0 ; python_version >= "3.12"',
     'expecttest==0.3.0',
     'mypy==1.13.0',
-    'sympy==1.13.0 ; python_version >= "3.9"',
+    'sympy==1.13.0',
     'types-requests==2.27.25',
     'types-PyYAML==6.0.7',
     'types-tabulate==0.8.8',

diff --git a/mypy-strict.ini b/mypy-strict.ini
@@ -6,7 +6,7 @@
 # files.
 
 [mypy]
-python_version = 3.8
+python_version = 3.10
 plugins = mypy_plugins/check_mypy_version.py, numpy.typing.mypy_plugin
 
 cache_dir = .mypy_cache/strict

diff --git a/pyproject.toml b/pyproject.toml
@@ -17,11 +17,11 @@ build-backend = "setuptools.build_meta:__legacy__"
 [tool.black]
 # Uncomment if pyproject.toml worked fine to ensure consistency with flake8
 # line-length = 120
-target-version = ["py38", "py39", "py310", "py311"]
+target-version = ["py310", "py311", "py312"]
 
 
 [tool.ruff]
-target-version = "py38"
+target-version = "py310"
 line-length = 120
 
 [tool.ruff.lint]

diff --git a/test/microbench/upsample_bicubic2d.py b/test/microbench/upsample_bicubic2d.py
@@ -80,6 +80,6 @@ def simple_test(in_shape, scale_factor, backward, dtype):
     [128, 128, 5, 5],
 ]
 scale_factor = [[3, 3], [3, 3], [7, 7], [7, 7]]
-for sp, sf in zip(shape_list, scale_factor):
+for sp, sf in zip(shape_list, scale_factor, strict=False):
     for dtype in [torch.bfloat16, torch.float16, torch.float32]:
         simple_test(sp, sf, backward, dtype)
diff --git a/test/microbench/upsample_bilinear2d.py b/test/microbench/upsample_bilinear2d.py
@@ -81,7 +81,7 @@ def simple_test(in_shape, scale_factor, backward, dtype, mode):
     [8, 32, 256, 256],
 ]
 scale_factor = [[3, 3], [3, 3], [7, 7], [7, 7], 3]
-for sp, sf in zip(shape_list, scale_factor):
+for sp, sf in zip(shape_list, scale_factor, strict=False):
     for dtype in [torch.bfloat16, torch.float16, torch.float32]:
         for mode in ["bilinear"]:
             simple_test(sp, sf, backward, dtype, mode)
diff --git a/test/regressions/test_deform_conv.py b/test/regressions/test_deform_conv.py
@@ -53,8 +53,8 @@ def bilinear_interpolate(data, y, x, snap_border=False):
     wx_l = 1 - wx_h
 
     val = 0
-    for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
-        for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
+    for wx, xp in zip((wx_l, wx_h), (x_low, x_high), strict=False):
+        for wy, yp in zip((wy_l, wy_h), (y_low, y_high), strict=False):
             if 0 <= yp < height and 0 <= xp < width:
                 val += wx * wy * data[yp, xp]
     return val

diff --git a/test/regressions/test_roi_align.py b/test/regressions/test_roi_align.py
@@ -27,8 +27,8 @@ def bilinear_interpolate(data, y, x, snap_border=False):
     wy_l = 1 - wy_h
     wx_l = 1 - wx_h
     val = 0
-    for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
-        for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
+    for wx, xp in zip((wx_l, wx_h), (x_low, x_high), strict=False):
+        for wy, yp in zip((wy_l, wy_h), (y_low, y_high), strict=False):
             if 0 <= yp < height and 0 <= xp < width:
                 val += wx * wy * data[yp, xp]
     return val
@@ -130,8 +130,12 @@ def expected_grad_fn(
                             wx_h = x - x_low
                             wy_l = 1 - wy_h
                             wx_l = 1 - wx_h
-                            for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
-                                for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
+                            for wx, xp in zip(
+                                (wx_l, wx_h), (x_low, x_high), strict=False
+                            ):
+                                for wy, yp in zip(
+                                    (wy_l, wy_h), (y_low, y_high), strict=False
+                                ):
                                     if 0 <= yp < in_data.size(
                                         2
                                     ) and 0 <= xp < in_data.size(3):

diff --git a/test/regressions/test_torchvision_roi_ops.py b/test/regressions/test_torchvision_roi_ops.py
@@ -358,8 +358,8 @@ def bilinear_interpolate(data, y, x, snap_border=False):
     wx_l = 1 - wx_h
 
     val = 0
-    for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
-        for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
+    for wx, xp in zip((wx_l, wx_h), (x_low, x_high), strict=False):
+        for wy, yp in zip((wy_l, wy_h), (y_low, y_high), strict=False):
             if 0 <= yp < height and 0 <= xp < width:
                 val += wx * wy * data[yp, xp]
     return val

diff --git a/test/xpu/distributed/test_c10d_ops_xccl.py b/test/xpu/distributed/test_c10d_ops_xccl.py
@@ -190,6 +190,7 @@ def allreduce(tensors, op):
         for op, err in zip(
             (c10d.ReduceOp.BAND, c10d.ReduceOp.BOR, c10d.ReduceOp.BXOR),
             ("ReduceOp.BAND", "ReduceOp.BOR", "ReduceOp.BXOR"),
+            strict=False,
         ):
             with self.assertRaisesRegex(ValueError, "Cannot use " + err + " with XCCL"):
                 allreduce(tensors, op)
@@ -254,6 +255,7 @@ def reduce(xs, rootRank, rootTensor, op=None):
             for op, err in zip(
                 (c10d.ReduceOp.BAND, c10d.ReduceOp.BOR, c10d.ReduceOp.BXOR),
                 ("ReduceOp.BAND", "ReduceOp.BOR", "ReduceOp.BXOR"),
+                strict=False,
             ):
                 with self.assertRaisesRegex(
                     ValueError, "Cannot use " + err + " with XCCL"
@@ -905,7 +907,7 @@ def test_all_to_all(self, dtype=torch.float):
         expected_tensors = [t.to(device) for t in expected_tensors]
         out_tensors = [t.to(device) for t in out_tensors]
         dist.all_to_all(out_tensors, in_tensors)
-        for t1, t2 in zip(out_tensors, expected_tensors):
+        for t1, t2 in zip(out_tensors, expected_tensors, strict=False):
             self.assertEqual(t1, t2)
 
     @requires_xccl()
@@ -918,7 +920,8 @@ def test_all_to_all_single_none(self):
         out = torch.zeros(self.world_size, 2, dtype=send.dtype).to(device)
         dist.all_to_all_single(out, send)
         self.assertEqual(
-            out.tolist(), list(zip(range(self.world_size), range(self.world_size)))
+            out.tolist(),
+            list(zip(range(self.world_size), range(self.world_size), strict=False)),
         )
 
 

diff --git a/test/xpu/distributed/test_c10d_xccl.py b/test/xpu/distributed/test_c10d_xccl.py
@@ -1113,7 +1113,7 @@ def test_batched_send_recv(self, op_sizes_per_coalesce, timing_enabled):
             first_op = seq * (ops_per_coalesce)
             coalesced_op = first_op + ops_per_coalesce
             for p2p_op_idx, input_sizes in zip(
-                range(first_op, coalesced_op, 1), op_sizes_per_coalesce
+                range(first_op, coalesced_op, 1), op_sizes_per_coalesce, strict=False
             ):
                 # the indivudal ops inside the coalescing group the individual op metadata,
                 # but not the timing info coming from the actual coalesced kernel

diff --git a/test/xpu/test_modules_xpu.py b/test/xpu/test_modules_xpu.py
@@ -64,7 +64,8 @@ def fn_to_gradcheck(*flat_input_and_params):
             new_input_args = input_and_params[: len(input_args)]
             kwarg_args = input_and_params[-len(kwarg_tensors) :]
             new_kwargs = {
-                name: obj for (name, _), obj in zip(kwarg_tensors, kwarg_args)
+                name: obj
+                for (name, _), obj in zip(kwarg_tensors, kwarg_args, strict=False)
             }
 
             with freeze_rng_state():
@@ -89,7 +90,7 @@ def fn_to_gradcheck(*flat_input_and_params):
         for _, obj in kwarg_tensors:
             obj.requires_grad = False
 
-        for p, old in zip(params, old_params_requires_grad):
+        for p, old in zip(params, old_params_requires_grad, strict=False):
             p.requires_grad = old
             grad_input = input_args + params + tuple(obj for (_, obj) in kwarg_tensors)
             flat_input, flat_spec = torch.utils._pytree.tree_flatten(grad_input)
@@ -98,7 +99,7 @@ def fn_to_gradcheck(*flat_input_and_params):
             )
             p.requires_grad = False
 
-        for (_, obj), old in zip(kwarg_tensors, old_kwargs_requires_grad):
+        for (_, obj), old in zip(kwarg_tensors, old_kwargs_requires_grad, strict=False):
             obj.requires_grad = old
             grad_input = input_args + params + tuple(obj for (_, obj) in kwarg_tensors)
             flat_input, flat_spec = torch.utils._pytree.tree_flatten(grad_input)
@@ -198,29 +199,49 @@ def _to(m, set_grad=False):
 
             if swap:
                 # id same, ._cdata differs --> swapped cdata of THPVariable
-                self.assertTrue(all(a == b for a, b in zip(p_ids_before, p_ids_after)))
                 self.assertTrue(
-                    all(a != b for a, b in zip(p_cdatas_before, p_cdatas_after))
+                    all(a == b for a, b in zip(p_ids_before, p_ids_after, strict=False))
+                )
+                self.assertTrue(
+                    all(
+                        a != b
+                        for a, b in zip(p_cdatas_before, p_cdatas_after, strict=False)
+                    )
                 )
                 if set_grad:
                     self.assertTrue(
                         all(
                             a == b if g_no_swap else a != b
-                            for a, b in zip(g_cdatas_before, g_cdatas_after)
+                            for a, b in zip(
+                                g_cdatas_before, g_cdatas_after, strict=False
+                            )
                         )
                     )
             else:
                 # id and _cdata remain the same --> .data setting
                 self.assertTrue(
-                    all(a == b for a, b in zip(p_cdatas_before, p_cdatas_after))
+                    all(
+                        a == b
+                        for a, b in zip(p_cdatas_before, p_cdatas_after, strict=False)
+                    )
+                )
+                self.assertTrue(
+                    all(a == b for a, b in zip(p_ids_before, p_ids_after, strict=False))
                 )
-                self.assertTrue(all(a == b for a, b in zip(p_ids_before, p_ids_after)))
                 if set_grad:
                     self.assertTrue(
-                        all(a == b for a, b in zip(g_cdatas_before, g_cdatas_after))
+                        all(
+                            a == b
+                            for a, b in zip(
+                                g_cdatas_before, g_cdatas_after, strict=False
+                            )
+                        )
                     )
                     self.assertTrue(
-                        all(a == b for a, b in zip(g_ids_before, g_ids_after))
+                        all(
+                            a == b
+                            for a, b in zip(g_ids_before, g_ids_after, strict=False)
+                        )
                     )
 
 
@@ -234,7 +255,7 @@ def _test_multiple_device_transfer(self, device, dtype, module_info, training):
         module_info, device="cpu", dtype=dtype, requires_grad=False, training=training
     )
     for module_input_device, module_input_cpu in zip(
-        module_inputs_device, module_inputs_cpu
+        module_inputs_device, module_inputs_cpu, strict=False
     ):
         if module_input_device.forward_input is None:
             continue
@@ -270,7 +291,7 @@ def _test_multiple_device_transfer(self, device, dtype, module_info, training):
             if torch.cuda.device_count() >= 2:
                 # === test cross-GPU transfer works
                 def _to_device1(objs):
-                    if isinstance(objs, (tuple, list)):
+                    if isinstance(objs, tuple | list):
                         return type(objs)(_to_device1(item) for item in objs)
                     elif isinstance(objs, dict):
                         return {name: _to_device1(item) for name, item in objs.items()}

diff --git a/test/xpu/test_nestedtensor_xpu.py b/test/xpu/test_nestedtensor_xpu.py
@@ -121,7 +121,7 @@ def _test_copy_(self):
         nt_copy = torch.empty_like(nt)
         nt_copy.copy_(nt)
 
-        for nt_ub, nt_copy_ub in zip(nt.unbind(), nt_copy):
+        for nt_ub, nt_copy_ub in zip(nt.unbind(), nt_copy, strict=False):
             self.assertEqual(nt_ub, nt_copy_ub)
 
         nt_error = torch.nested.nested_tensor([torch.tensor([0, 0])])
@@ -136,12 +136,12 @@ def _test_copy_(self):
             nt_copy = torch.empty_like(nt, device=torch.device("cpu"))
             nt_copy.copy_(nt, non_blocking=True)
             torch.xpu.current_stream(torch.xpu.current_device()).synchronize()
-            for nt_ub, nt_copy_ub in zip(nt.unbind(), nt_copy):
+            for nt_ub, nt_copy_ub in zip(nt.unbind(), nt_copy, strict=False):
                 self.assertEqual(nt_ub, nt_copy_ub)
 
             nt_copy = torch.empty_like(nt, device=torch.device("cpu"))
             nt_copy.copy_(nt, non_blocking=False)
-            for nt_ub, nt_copy_ub in zip(nt.unbind(), nt_copy):
+            for nt_ub, nt_copy_ub in zip(nt.unbind(), nt_copy, strict=False):
                 self.assertEqual(nt_ub, nt_copy_ub)
 
     @skipMeta
@@ -608,7 +608,7 @@ def check_forward_backward():
 
             nt_grads = torch.autograd.grad(attn_nt.values().sum(), (q_nt, k_nt, v_nt))
             for nt_grad, d1_grad, d2_grad, grad_atol, grad_rtol in zip(
-                nt_grads, d1_grads, d2_grads, grad_atols, grad_rtols
+                nt_grads, d1_grads, d2_grads, grad_atols, grad_rtols, strict=False
             ):
                 unbound_nt_grads = nt_grad.unbind()
                 self.assertEqual(

diff --git a/test/xpu/test_nn_xpu.py b/test/xpu/test_nn_xpu.py
@@ -2426,7 +2426,7 @@ def _test_linear_autograd(self, device, bias, weight_layout):
 
     self.assertEqual(grads_expected[0].layout, weight_layout)
 
-    for g, ge in zip(grads, grads_expected):
+    for g, ge in zip(grads, grads_expected, strict=False):
         self.assertEqual(g, ge)
 
 

diff --git a/test/xpu/test_optim_xpu.py b/test/xpu/test_optim_xpu.py
@@ -235,7 +235,9 @@ def closure():
         fused = state_dict_cpu["param_groups"][0].get("fused", False)
         new_state_dict = optimizer_cuda.state_dict()
         for state_cpu, state_cuda in zip(
-            state_dict_cpu["state"].values(), new_state_dict["state"].values()
+            state_dict_cpu["state"].values(),
+            new_state_dict["state"].values(),
+            strict=False,
         ):
             if "step" in state_cpu and torch.is_tensor(state_cpu["step"]):
                 self.assertEqual(

diff --git a/test/xpu/test_sort_and_select_xpu.py b/test/xpu/test_sort_and_select_xpu.py
@@ -50,7 +50,7 @@ def generate_samples():
 
             def repeated_index_fill(t, dim, idxs, vals):
                 res = t
-                for idx, val in zip(idxs, vals):
+                for idx, val in zip(idxs, vals, strict=False):
                     res = res.index_fill(dim, idx, val)
                 return res
 
@@ -70,11 +70,11 @@ def repeated_index_fill(t, dim, idxs, vals):
                     )
                     vals = (inf, neg_inf, nan)
                     subsets = chain.from_iterable(
-                        combinations(list(zip(idxs, vals)), r)
+                        combinations(list(zip(idxs, vals, strict=False)), r)
                         for r in range(1, n_fill_vals + 1)
                     )
                     for subset in subsets:
-                        idxs_subset, vals_subset = zip(*subset)
+                        idxs_subset, vals_subset = zip(*subset, strict=False)
                         yield (
                             repeated_index_fill(x, dim, idxs_subset, vals_subset),
                             dim,