microsoft · snnn · Jan 15, 2025 · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025
diff --git a/docs/python/examples/plot_train_convert_predict.py b/docs/python/examples/plot_train_convert_predict.py
@@ -212,9 +212,9 @@ def sess_predict_proba_rf(x):
     rf.fit(X_train, y_train)
     initial_type = [("float_input", FloatTensorType([1, 4]))]
     onx = convert_sklearn(rf, initial_types=initial_type)
-    with open("rf_iris_%d.onnx" % n_trees, "wb") as f:
+    with open(f"rf_iris_{n_trees}.onnx", "wb") as f:
         f.write(onx.SerializeToString())
-    sess = rt.InferenceSession("rf_iris_%d.onnx" % n_trees, providers=rt.get_available_providers())
+    sess = rt.InferenceSession(f"rf_iris_{n_trees}.onnx", providers=rt.get_available_providers())
 
     def sess_predict_proba_loop(x):
         return sess.run([prob_name], {input_name: x.astype(numpy.float32)})[0]  # noqa: B023

diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py
@@ -161,7 +161,7 @@ class CalibrationMethod(Enum):
 class CalibrationDataReader(metaclass=abc.ABCMeta):
     @classmethod
     def __subclasshook__(cls, subclass):
-        return hasattr(subclass, "get_next") and callable(subclass.get_next) or NotImplemented
+        return (hasattr(subclass, "get_next") and callable(subclass.get_next)) or NotImplemented
 
     @abc.abstractmethod
     def get_next(self) -> dict:

diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py
@@ -907,11 +907,7 @@ def smooth_distribution(p, eps=0.0001):
         # raise ValueError('The discrete probability distribution is malformed. All entries are 0.')
         return None
     eps1 = eps * float(n_zeros) / float(n_nonzeros)
-    assert eps1 < 1.0, "n_zeros=%d, n_nonzeros=%d, eps1=%f" % (
-        n_zeros,
-        n_nonzeros,
-        eps1,
-    )
+    assert eps1 < 1.0, f"n_zeros={n_zeros}, n_nonzeros={n_nonzeros}, eps1={eps1}"
 
     hist = p.astype(numpy.float32)
     hist += eps * is_zeros + (-eps1) * is_nonzeros

diff --git a/onnxruntime/python/tools/tensorrt/perf/build/ort_build_latest.py b/onnxruntime/python/tools/tensorrt/perf/build/ort_build_latest.py
@@ -44,9 +44,8 @@ def main():
     cmake_tar = "cmake-3.28.3-linux-x86_64.tar.gz"
     if not os.path.exists(cmake_tar):
         subprocess.run(["wget", "-c", "https://cmake.org/files/v3.28/" + cmake_tar], check=True)
-    tar = tarfile.open(cmake_tar)
-    tar.extractall()
-    tar.close()
+    with tarfile.open(cmake_tar) as tar:
+        tar.extractall()
 
     os.environ["PATH"] = os.path.join(os.path.abspath("cmake-3.28.3-linux-x86_64"), "bin") + ":" + os.environ["PATH"]
     os.environ["CUDACXX"] = os.path.join(args.cuda_home, "bin", "nvcc")

diff --git a/onnxruntime/python/tools/tensorrt/perf/setup_scripts/setup_onnx_zoo.py b/onnxruntime/python/tools/tensorrt/perf/setup_scripts/setup_onnx_zoo.py
@@ -17,11 +17,10 @@
 def extract_and_get_files(file_name):
     model_folder = file_name.replace(".tar.gz", "") + "/"
     create_model_folder(model_folder)
-    model_tar = tarfile.open(file_name)
-    model_tar.extractall(model_folder)
-    file_list = model_tar.getnames()
-    file_list.sort()
-    model_tar.close()
+    with tarfile.open(file_name) as model_tar:
+        model_tar.extractall(model_folder)
+        file_list = model_tar.getnames()
+        file_list.sort()
     return model_folder, file_list
 
 

diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -85,7 +85,7 @@ def cuda_device_count(self, cuda_lib):
         if result != 0:
             error_str = ctypes.c_char_p()
             cuda_lib.cuGetErrorString(result, ctypes.byref(error_str))
-            print("cuDeviceGetCount failed with error code %d: %s" % (result, error_str.value.decode()))
+            print(f"cuDeviceGetCount failed with error code {result}: {error_str.value.decode()}")
             return -1
         return num_device.value
 

diff --git a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py
@@ -221,7 +221,7 @@ def test_bind_onnx_types_not_supported_by_numpy(self):
             )
 
         for inner_device, provider in devices:
-            for onnx_dtype in onnx_to_torch_type_map:
+            for onnx_dtype, torch_dtype in onnx_to_torch_type_map.items():
                 with self.subTest(onnx_dtype=onnx_dtype, inner_device=str(inner_device)):
 
                     # Create onnx graph with dynamic axes
@@ -239,7 +239,6 @@ def test_bind_onnx_types_not_supported_by_numpy(self):
 
                     sess = onnxrt.InferenceSession(model_def.SerializeToString(), providers=provider)
 
-                    torch_dtype = onnx_to_torch_type_map[onnx_dtype]
                     x = torch.arange(8).to(torch_dtype)
                     y = torch.empty(8, dtype=torch_dtype)
 

diff --git a/onnxruntime/test/python/onnxruntime_test_scatternd.py b/onnxruntime/test/python/onnxruntime_test_scatternd.py
@@ -88,8 +88,8 @@ def common_scatter(self, opset, providers, dtype, reduction, expected_names):
         self.assertEqual(expected_names, names)
 
         sonx = str(onx).replace(" ", "").replace("\n", "|")
-        sexp = 'op_type:"Cast"|attribute{|name:"to"|type:INT|i:%d|}' % itype
-        sexp2 = 'op_type:"Cast"|attribute{|name:"to"|i:%d|type:INT|}' % itype
+        sexp = 'op_type:"Cast"|attribute{|name:"to"|type:INT|i:%d|}' % itype  # noqa: UP031
+        sexp2 = 'op_type:"Cast"|attribute{|name:"to"|i:%d|type:INT|}' % itype  # noqa: UP031
         assert sexp in sonx or sexp2 in sonx, f"Unable to find a substring in {sonx!r}"
         if providers == ["CPUExecutionProvider"]:
             return

diff --git a/onnxruntime/test/python/quantization/op_test_utils.py b/onnxruntime/test/python/quantization/op_test_utils.py
@@ -379,10 +379,10 @@ def check_op_type_count(testcase, model_path, **kwargs):
         if node.op_type in optype2count:
             optype2count[node.op_type] += 1
 
-    for op_type in kwargs:
+    for op_type, value in kwargs.items():
         try:
             testcase.assertEqual(
-                kwargs[op_type],
+                value,
                 optype2count[op_type],
                 f"op_type {op_type} count not same",
             )

diff --git a/onnxruntime/test/python/quantization/test_calibration.py b/onnxruntime/test/python/quantization/test_calibration.py
@@ -361,8 +361,8 @@ def test_compute_data(self):
         min_max_pairs = list(zip(rmin, rmax))
         output_names = [infer_session.get_outputs()[i].name for i in range(len(infer_session.get_outputs()))]
         output_min_max_dict = dict(zip(output_names, min_max_pairs))
-        for output_name in output_min_max_dict:
-            self.assertEqual(output_min_max_dict[output_name], tensors_range[output_name].range_value)
+        for output_name, min_max in output_min_max_dict.items():
+            self.assertEqual(min_max, tensors_range[output_name].range_value)
 
     def test_histogram_calibrators_run(self):
         """
@@ -524,8 +524,8 @@ def test_compute_data_per_channel(self):
         min_max_pairs = list(zip(rmin, rmax))
         output_names = [infer_session.get_outputs()[i].name for i in range(len(infer_session.get_outputs()))]
         output_min_max_dict = dict(zip(output_names, min_max_pairs))
-        for output_name in output_min_max_dict:
-            np.testing.assert_equal(output_min_max_dict[output_name], tensors_range[output_name].range_value)
+        for output_name, min_max in output_min_max_dict.values():
+            np.testing.assert_equal(min_max, tensors_range[output_name].range_value)
 
 
 if __name__ == "__main__":

diff --git a/..._data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py b/..._data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py
@@ -291,9 +291,9 @@ def resize_model(self):
                 reshapes[initializer.name] = new_shape
                 print("initializer", initializer.name, tensor.shape, "=>", new_shape)
 
-        for initializer_name in reshapes:
+        for initializer_name, reshape_name in reshapes.items():
             self.replace_input_of_all_nodes(initializer_name, initializer_name + "_resize")
-            tensor = self.resize_weight(initializer_name, reshapes[initializer_name])
+            tensor = self.resize_weight(initializer_name, reshape_name)
             self.model.graph.initializer.extend([tensor])
 
         self.use_dynamic_axes()

diff --git a/...me/test/python/transformers/test_data/gpt2_pytorch1.5_opset11/generate_tiny_gpt2_model.py b/...me/test/python/transformers/test_data/gpt2_pytorch1.5_opset11/generate_tiny_gpt2_model.py
@@ -331,9 +331,9 @@ def resize_model(self):
                 reshapes[initializer.name] = new_shape
                 print("initializer", initializer.name, tensor.shape, "=>", new_shape)
 
-        for initializer_name in reshapes:
+        for initializer_name, reshape_name in reshapes.values():
             self.replace_input_of_all_nodes(initializer_name, initializer_name + "_resize")
-            tensor = self.resize_weight(initializer_name, reshapes[initializer_name])
+            tensor = self.resize_weight(initializer_name, reshape_name)
             self.model.graph.initializer.extend([tensor])
 
         # Add node name, replace split node attribute.

diff --git a/...ntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py b/...ntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py
@@ -60,7 +60,7 @@ def main():
 
         import tempfile
 
-        fp = tempfile.NamedTemporaryFile()
+        fp = tempfile.NamedTemporaryFile()  # noqa: SIM115
 
         adamw_optimizer = torch.optim.AdamW(pt_model.parameters(), lr=1e-3)
         scheduler = WarmupLinearSchedule(adamw_optimizer, num_warmup_steps, num_training_steps)

diff --git a/orttraining/orttraining/python/training/__init__.py b/orttraining/orttraining/python/training/__init__.py
@@ -15,9 +15,9 @@
 __all__ = [
     "PropagateCastOpsStrategy",
     "TrainingParameters",
-    "is_ortmodule_available",
     "amp",
     "artifacts",
+    "is_ortmodule_available",
     "optim",
 ]
 

diff --git a/orttraining/orttraining/python/training/_utils.py b/orttraining/orttraining/python/training/_utils.py
@@ -175,8 +175,8 @@ def static_vars(**kwargs):
     """
 
     def decorate(func):
-        for k in kwargs:
-            setattr(func, k, kwargs[k])
+        for k, v in kwargs.items():
+            setattr(func, k, v)
         return func
 
     return decorate

diff --git a/orttraining/orttraining/python/training/onnxblock/__init__.py b/orttraining/orttraining/python/training/onnxblock/__init__.py
@@ -12,15 +12,15 @@
 from onnxruntime.training.onnxblock.onnxblock import ForwardBlock, TrainingBlock
 
 __all__ = [
-    "blocks",
-    "loss",
-    "optim",
     "Block",
     "ForwardBlock",
     "TrainingBlock",
-    "load_checkpoint_to_model",
-    "save_checkpoint",
     "base",
+    "blocks",
     "custom_op_library",
     "empty_base",
+    "load_checkpoint_to_model",
+    "loss",
+    "optim",
+    "save_checkpoint",
 ]
diff --git a/orttraining/orttraining/python/training/onnxblock/optim/__init__.py b/orttraining/orttraining/python/training/onnxblock/optim/__init__.py
@@ -3,4 +3,4 @@
 
 from onnxruntime.training.onnxblock.optim.optim import SGD, AdamW, ClipGradNorm
 
-__all__ = ["AdamW", "ClipGradNorm", "SGD"]
+__all__ = ["SGD", "AdamW", "ClipGradNorm"]
diff --git a/orttraining/orttraining/python/training/optim/_megatron_modifier.py b/orttraining/orttraining/python/training/optim/_megatron_modifier.py
@@ -18,8 +18,8 @@
 class LegacyMegatronLMModifier(FP16OptimizerModifier):
     def __init__(self, optimizer, **kwargs) -> None:
         super().__init__(optimizer)
-        self.get_horizontal_model_parallel_rank = kwargs.get("get_horizontal_model_parallel_rank", None)
-        self.get_horizontal_model_parallel_group = kwargs.get("get_horizontal_model_parallel_group", None)
+        self.get_horizontal_model_parallel_rank = kwargs.get("get_horizontal_model_parallel_rank")
+        self.get_horizontal_model_parallel_group = kwargs.get("get_horizontal_model_parallel_group")
 
     def can_be_modified(self):
         return self.check_requirements(

diff --git a/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py b/orttraining/orttraining/python/training/ortmodule/_runtime_inspector.py
@@ -229,7 +229,7 @@ def find_memory_optimization_opportunity(self, execution_agent: TrainingAgent, r
 
                 apply_config.append(",".join(recompute_configs))
 
-            self._json_file_for_layerwise_recompute = tempfile.NamedTemporaryFile(mode="w+")
+            self._json_file_for_layerwise_recompute = tempfile.NamedTemporaryFile(mode="w+")  # noqa: SIM115
             json.dump(apply_config, self._json_file_for_layerwise_recompute)
             self._json_file_for_layerwise_recompute.flush()
             runtime_options.memory_optimizer_config_file_path = self._json_file_for_layerwise_recompute.name

diff --git a/orttraining/orttraining/python/training/utils/__init__.py b/orttraining/orttraining/python/training/utils/__init__.py
@@ -24,17 +24,17 @@
 )
 
 __all__ = [
-    "PrimitiveType",
-    "ORTModelInputOutputType",
     "ORTModelInputOutputSchemaType",
+    "ORTModelInputOutputType",
+    "PTable",
+    "PrimitiveType",
     "extract_data_and_schema",
-    "unflatten_data_using_schema",
-    "torch_nvtx_range_push",
-    "torch_nvtx_range_pop",
-    "nvtx_function_decorator",
     "log_memory_usage",
-    "pytorch_type_to_onnx_dtype",
+    "nvtx_function_decorator",
     "onnx_dtype_to_pytorch_dtype",
     "pytorch_scalar_type_to_pytorch_dtype",
-    "PTable",
+    "pytorch_type_to_onnx_dtype",
+    "torch_nvtx_range_pop",
+    "torch_nvtx_range_push",
+    "unflatten_data_using_schema",
 ]
diff --git a/orttraining/orttraining/python/training/utils/hooks/__init__.py b/orttraining/orttraining/python/training/utils/hooks/__init__.py
@@ -7,11 +7,11 @@
 import torch
 
 __all__ = [
-    "StatisticsSubscriber",
     "GlobalSubscriberManager",
-    "inspect_activation",
+    "StatisticsSubscriber",
     "ZeROOffloadSubscriber",
     "configure_ort_compatible_zero_stage3",
+    "inspect_activation",
 ]
 
 from ._statistics_subscriber import StatisticsSubscriber, _InspectActivation

diff --git a/orttraining/orttraining/python/training/utils/torch_io_helper.py b/orttraining/orttraining/python/training/utils/torch_io_helper.py
@@ -52,7 +52,7 @@ def get_primitive_dtype(value):
 class _TensorStub:
     """Tensor stub class used to represent model's input or output"""
 
-    __slots__ = ["tensor_idx", "name", "dtype", "shape", "shape_dims"]
+    __slots__ = ["dtype", "name", "shape", "shape_dims", "tensor_idx"]
 
     def __init__(
         self,

diff --git a/orttraining/orttraining/test/python/orttraining_test_model_transform.py b/orttraining/orttraining/test/python/orttraining_test_model_transform.py
@@ -3,7 +3,7 @@
 
 def add_name(model):
     for i, node in enumerate(model.graph.node):
-        node.name = "%s_%d" % (node.op_type, i)
+        node.name = f"{node.op_type}_{i}"
 
 
 def find_single_output_node(model, arg):

diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py
@@ -376,7 +376,7 @@ def main():
     # Device (CPU vs CUDA)
     if torch.cuda.is_available() and not args.no_cuda:
         device = torch.device("cuda")
-        print("There are %d GPU(s) available." % torch.cuda.device_count())
+        print(f"There are {torch.cuda.device_count()} GPU(s) available.")
         print("We will use the GPU:", torch.cuda.get_device_name(0))
     else:
         print("No GPU available, using the CPU instead.")

diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py
@@ -376,7 +376,7 @@ def main():
     # Device (CPU vs CUDA)
     if torch.cuda.is_available() and not args.no_cuda:
         device = torch.device("cuda")
-        print("There are %d GPU(s) available." % torch.cuda.device_count())
+        print(f"There are {torch.cuda.device_count()} GPU(s) available.")
         print("We will use the GPU:", torch.cuda.get_device_name(0))
     else:
         print("No GPU available, using the CPU instead.")

diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py
@@ -112,7 +112,7 @@ def demo_checkpoint(rank, world_size, use_ort_module):
     # 0 saves it.
     dist.barrier()
     # configure map_location properly
-    map_location = {"cuda:%d" % 0: "cuda:%d" % rank}
+    map_location = {"cuda:0": f"cuda:{rank}"}
     ddp_model.load_state_dict(torch.load(CHECKPOINT_PATH, map_location=map_location))
 
     optimizer.zero_grad()

diff --git a/orttraining/tools/scripts/gpt2_model_transform.py b/orttraining/tools/scripts/gpt2_model_transform.py
@@ -18,7 +18,7 @@
 
 def add_name(model):
     for i, node in enumerate(model.graph.node):
-        node.name = "%s_%d" % (node.op_type, i)
+        node.name = f"{node.op_type}_{i}"
 
 
 def find_input_node(model, arg):
@@ -139,7 +139,7 @@ def process_concat(model):
     # insert new shape to reshape
     for index, reshape_node_index in enumerate(new_nodes):
         shape_tensor = numpy_helper.from_array(np.asarray(new_nodes[reshape_node_index], dtype=np.int64))
-        const_node = add_const(model, "concat_shape_node_%d" % index, "concat_shape_%d" % index, shape_tensor)
+        const_node = add_const(model, f"concat_shape_node_{index}", f"concat_shape_{index}", shape_tensor)
         reshape_node = model.graph.node[reshape_node_index]
         reshape_node.input[1] = const_node.output[0]
     # delete nodes
@@ -227,13 +227,13 @@ def process_dropout(model):
         if node.op_type == "Dropout":
             new_dropout = model.graph.node.add()
             new_dropout.op_type = "TrainableDropout"
-            new_dropout.name = "TrainableDropout_%d" % index
+            new_dropout.name = f"TrainableDropout_{index}"
             # make ratio node
             ratio = np.asarray([node.attribute[0].f], dtype=np.float32)
             print(ratio.shape)
             ratio_value = numpy_helper.from_array(ratio)
             ratio_node = add_const(
-                model, "dropout_node_ratio_%d" % index, "dropout_node_ratio_%d" % index, t_value=ratio_value
+                model, f"dropout_node_ratio_{index}", f"dropout_node_ratio_{index}", t_value=ratio_value
             )
             print(ratio_node)
             new_dropout.input.extend([node.input[0], ratio_node.output[0]])

diff --git a/orttraining/tools/scripts/model_transform.py b/orttraining/tools/scripts/model_transform.py
@@ -18,7 +18,7 @@
 
 def add_name(model):
     for i, node in enumerate(model.graph.node):
-        node.name = "%s_%d" % (node.op_type, i)
+        node.name = f"{node.op_type}_{i}"
 
 
 def find_input_node(model, arg):
@@ -120,7 +120,7 @@ def process_concat(model):
     # insert new shape to reshape
     for index, reshape_node_index in enumerate(new_nodes):
         shape_tensor = numpy_helper.from_array(np.asarray(new_nodes[reshape_node_index], dtype=np.int64))
-        const_node = add_const(model, "concat_shape_node_%d" % index, "concat_shape_%d" % index, shape_tensor)
+        const_node = add_const(model, f"concat_shape_node_{index}", f"concat_shape_{index}", shape_tensor)
         reshape_node = model.graph.node[reshape_node_index]
         reshape_node.input[1] = const_node.output[0]
     # delete nodes
@@ -251,13 +251,13 @@ def process_dropout(model):
         if node.op_type == "Dropout":
             new_dropout = model.graph.node.add()
             new_dropout.op_type = "TrainableDropout"
-            new_dropout.name = "TrainableDropout_%d" % index
+            new_dropout.name = f"TrainableDropout_{index}"
             # make ratio node
             ratio = np.asarray([node.attribute[0].f], dtype=np.float32)
             print(ratio.shape)
             ratio_value = numpy_helper.from_array(ratio)
             ratio_node = add_const(
-                model, "dropout_node_ratio_%d" % index, "dropout_node_ratio_%d" % index, t_value=ratio_value
+                model, f"dropout_node_ratio_{index}", f"dropout_node_ratio_{index}", t_value=ratio_value
             )
             print(ratio_node)
             new_dropout.input.extend([node.input[0], ratio_node.output[0]])
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,4 +3,4 @@

		from onnxruntime.training.onnxblock.optim.optim import SGD, AdamW, ClipGradNorm

		__all__ = ["AdamW", "ClipGradNorm", "SGD"]
		__all__ = ["SGD", "AdamW", "ClipGradNorm"]