From 40de954d7384d180da2b9648d29e1dd4d774acca Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Wed, 3 Dec 2025 16:17:15 +0800 Subject: [PATCH 01/13] first step --- tester/gpu_custom_dump.py | 286 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 tester/gpu_custom_dump.py diff --git a/tester/gpu_custom_dump.py b/tester/gpu_custom_dump.py new file mode 100644 index 00000000..ec575962 --- /dev/null +++ b/tester/gpu_custom_dump.py @@ -0,0 +1,286 @@ +import argparse +import os +from datetime import datetime + +import paddle + +from . import APIConfig +from .base import APITestBase + + +class APITestGPUCustomDump(APITestBase): + """ + 在 GPU 与自定义设备(如 XPU / 第三方定制卡)上运行同一 API case, + 计算前向 + 反向结果,并将结果以 npz 形式落盘。 + """ + + def __init__( + self, + api_config, + dump_dir="report/gpu_custom_dump", + test_amp=False, + gpu_id=0, + custom_device_type=None, + custom_device_id=0, + ): + super().__init__(api_config) + self.dump_dir = dump_dir + self.test_amp = test_amp + self.gpu_id = gpu_id + self.custom_device_type = custom_device_type + self.custom_device_id = custom_device_id + + # -------------------- 设备与落盘相关工具函数 -------------------- + def _ensure_dirs(self, path): + os.makedirs(path, exist_ok=True) + + def _to_tensor_list(self, x): + """将输出 / 梯度统一转换成 Tensor 列表,便于直接序列化保存。""" + if x is None: + return None + if isinstance(x, paddle.Tensor): + return [x] + if isinstance(x, (list, tuple)): + tensors = [t for t in x if isinstance(t, paddle.Tensor)] + return tensors or None + return None + + def _dump_results(self, tag, output, grads): + """ + 将指定设备的前向 / 反向结果直接保存为 Tensor 列表(使用 paddle.save): + //{tag}_forward.pdtensor + //{tag}_grad.pdtensor + """ + api_name = self.api_config.config.replace("/", "_").replace(" ", "_") + dump_path = os.path.join(self.dump_dir, api_name) + self._ensure_dirs(dump_path) + + out_list = self._to_tensor_list(output) + grad_list = self._to_tensor_list(grads) + + if out_list is not None: + paddle.save(out_list, os.path.join(dump_path, f"{tag}_forward.pdtensor")) + if grad_list is not None: + paddle.save(grad_list, os.path.join(dump_path, f"{tag}_grad.pdtensor")) + + def _run_on_device(self, device_str): + """ + 在指定设备上运行一次前向 + 反向,返回 (output, grads)。 + device_str 形如:'gpu:0', 'xpu:0', 'iluvatar_gpu:0' 等。 + """ + import paddle + + try: + paddle.set_device(device_str) + except Exception as e: + print(f"[device set error] {device_str} -> {e}", flush=True) + return None, None + + if not self.gen_paddle_input(): + print(f"[gen_paddle_input failed] device={device_str}", flush=True) + return None, None + + # 前向 + try: + if self.test_amp: + with paddle.amp.auto_cast(): + output = self.paddle_api(*tuple(self.paddle_args), **self.paddle_kwargs) + else: + output = self.paddle_api(*tuple(self.paddle_args), **self.paddle_kwargs) + except Exception as err: + print(f"[forward error] device={device_str} {self.api_config.config}\n{err}", flush=True) + return None, None + + # 反向 + out_grads = None + if self.need_check_grad(): + inputs_list = self.get_paddle_input_list() + try: + result_outputs, result_outputs_grads = self.gen_paddle_output_and_output_grad(output) + except Exception as grad_prepare_err: + print( + f"[backward prepare error] device={device_str} {self.api_config.config}\n{grad_prepare_err}", + flush=True, + ) + return output, None + + if inputs_list and result_outputs and result_outputs_grads: + try: + out_grads = paddle.grad( + result_outputs, + inputs_list, + grad_outputs=result_outputs_grads, + allow_unused=True, + ) + except Exception as grad_err: + print( + f"[backward error] device={device_str} {self.api_config.config}\n{grad_err}", + flush=True, + ) + out_grads = None + else: + print( + f"[backward skip] device={device_str} no valid inputs or outputs for gradient computation", + flush=True, + ) + + return output, out_grads + + # -------------------- 主流程:GPU vs Custom 设备 -------------------- + def test(self): + # 1. 是否跳过 + if self.need_skip(): + print("[Skip]", self.api_config.config, flush=True) + return + + # 2. 解析 Paddle API 信息 & 生成 numpy 输入 + if not self.ana_paddle_api_info(): + print("[ana_paddle_api_info failed]", self.api_config.config, flush=True) + return + + try: + if not self.gen_numpy_input(): + print("[gen_numpy_input failed]", self.api_config.config, flush=True) + return + except Exception as err: + print("[numpy error]", self.api_config.config, "\n", str(err), flush=True) + return + + # 3. 确定 GPU / 自定义设备字符串 + gpu_device_str = f"gpu:{self.gpu_id}" + + if self.custom_device_type is None: + # 自动探测:优先 XPU,再尝试自定义设备 + try: + if paddle.device.is_compiled_with_xpu(): + self.custom_device_type = "xpu" + else: + custom_types = paddle.device.get_all_custom_device_type() + if custom_types: + self.custom_device_type = custom_types[0] + else: + print( + "[no custom device available] " + "compiled_without_xpu and no custom_device_type found.", + self.api_config.config, + flush=True, + ) + return + except Exception as e: + print(f"[detect custom device error] {e}", flush=True) + return + + custom_device_str = ( + f"{self.custom_device_type}:{self.custom_device_id}" + if self.custom_device_type != "xpu" + else f"xpu:{self.custom_device_id}" + ) + + print( + f"{datetime.now()} [Begin] {self.api_config.config}\n" + f" GPU device : {gpu_device_str}\n" + f" Custom device: {custom_device_str}", + flush=True, + ) + + # 4. GPU 上运行 + gpu_out, gpu_grads = self._run_on_device(gpu_device_str) + if gpu_out is None: + print("[gpu execution failed]", self.api_config.config, flush=True) + else: + self._dump_results("gpu", gpu_out, gpu_grads) + print("[gpu dump done]", self.api_config.config, flush=True) + + # 5. 自定义设备 / XPU 上运行 + custom_out, custom_grads = self._run_on_device(custom_device_str) + if custom_out is None: + print(f"[{custom_device_str} execution failed]", self.api_config.config, flush=True) + else: + tag = self.custom_device_type if self.custom_device_type is not None else "custom" + self._dump_results(tag, custom_out, custom_grads) + print(f"[{tag} dump done]", self.api_config.config, flush=True) + + +def parse_bool(v): + if isinstance(v, bool): + return v + s = str(v).lower() + if s in {"true", "1", "yes", "y"}: + return True + if s in {"false", "0", "no", "n"}: + return False + raise argparse.ArgumentTypeError(f"Invalid bool value: {v}") + + +def main(): + parser = argparse.ArgumentParser( + description="在 GPU / 自定义设备 上运行 API case,并将前向 + 反向结果以 npz 落盘。" + ) + parser.add_argument( + "--api_config", + type=str, + required=True, + help="单条 API 配置(与 engine 中的 api_config 字符串格式一致)", + ) + parser.add_argument( + "--dump_dir", + type=str, + default="report/gpu_custom_dump", + help="结果保存目录(npz 文件会按 API 配置分子目录存放)", + ) + parser.add_argument( + "--test_amp", + type=parse_bool, + default=False, + help="是否在前向中启用 AMP 自动混合精度", + ) + parser.add_argument( + "--gpu_id", + type=int, + default=0, + help="使用的 GPU 设备号(形如 gpu:)", + ) + parser.add_argument( + "--custom_device_type", + type=str, + default=None, + help="自定义设备类型名称,例如 'xpu'、'iluvatar_gpu' 等;" + "留空则自动探测:优先 XPU,再尝试 paddle 自定义设备。", + ) + parser.add_argument( + "--custom_device_id", + type=int, + default=0, + help="自定义设备 ID,如 xpu:0 / iluvatar_gpu:0 中的 0", + ) + + args = parser.parse_args() + + print(f"Options: {vars(args)}", flush=True) + + try: + api_config = APIConfig(args.api_config.strip()) + except Exception as err: + print(f"[config parse error] {args.api_config} {str(err)}", flush=True) + return + + case = APITestGPUCustomDump( + api_config, + dump_dir=args.dump_dir, + test_amp=args.test_amp, + gpu_id=args.gpu_id, + custom_device_type=args.custom_device_type, + custom_device_id=args.custom_device_id, + ) + try: + case.test() + finally: + case.clear_tensor() + del case + del api_config + + +if __name__ == "__main__": + main() + + From e86c71f7752ce3b60105da207b19987014d66203 Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Wed, 3 Dec 2025 18:30:48 +0800 Subject: [PATCH 02/13] test0 --- tester/gpu_custom_dump.py | 171 +++++++++++++++----------------------- 1 file changed, 66 insertions(+), 105 deletions(-) diff --git a/tester/gpu_custom_dump.py b/tester/gpu_custom_dump.py index ec575962..7625895b 100644 --- a/tester/gpu_custom_dump.py +++ b/tester/gpu_custom_dump.py @@ -1,6 +1,8 @@ import argparse import os +import hashlib from datetime import datetime +from engineV2 import detect_device_type import paddle @@ -9,33 +11,20 @@ class APITestGPUCustomDump(APITestBase): - """ - 在 GPU 与自定义设备(如 XPU / 第三方定制卡)上运行同一 API case, - 计算前向 + 反向结果,并将结果以 npz 形式落盘。 - """ - def __init__( self, api_config, - dump_dir="report/gpu_custom_dump", + dump_dir="gpu_custom_dump", test_amp=False, - gpu_id=0, - custom_device_type=None, - custom_device_id=0, ): super().__init__(api_config) self.dump_dir = dump_dir self.test_amp = test_amp - self.gpu_id = gpu_id - self.custom_device_type = custom_device_type - self.custom_device_id = custom_device_id - # -------------------- 设备与落盘相关工具函数 -------------------- def _ensure_dirs(self, path): os.makedirs(path, exist_ok=True) def _to_tensor_list(self, x): - """将输出 / 梯度统一转换成 Tensor 列表,便于直接序列化保存。""" if x is None: return None if isinstance(x, paddle.Tensor): @@ -46,11 +35,6 @@ def _to_tensor_list(self, x): return None def _dump_results(self, tag, output, grads): - """ - 将指定设备的前向 / 反向结果直接保存为 Tensor 列表(使用 paddle.save): - //{tag}_forward.pdtensor - //{tag}_grad.pdtensor - """ api_name = self.api_config.config.replace("/", "_").replace(" ", "_") dump_path = os.path.join(self.dump_dir, api_name) self._ensure_dirs(dump_path) @@ -58,18 +42,24 @@ def _dump_results(self, tag, output, grads): out_list = self._to_tensor_list(output) grad_list = self._to_tensor_list(grads) + key = f"{tag}-{api_name}" + sha16 = hashlib.sha256(key.encode("utf-8")).hexdigest()[:16] + file_prefix = f"{tag}-{api_name}-{sha16}" + + forward_path = None + grad_path = None + if out_list is not None: - paddle.save(out_list, os.path.join(dump_path, f"{tag}_forward.pdtensor")) + forward_path = os.path.join(dump_path, f"{file_prefix}_forward.pdtensor") + paddle.save(out_list, forward_path) + if grad_list is not None: - paddle.save(grad_list, os.path.join(dump_path, f"{tag}_grad.pdtensor")) + grad_path = os.path.join(dump_path, f"{file_prefix}_grad.pdtensor") + paddle.save(grad_list, grad_path) - def _run_on_device(self, device_str): - """ - 在指定设备上运行一次前向 + 反向,返回 (output, grads)。 - device_str 形如:'gpu:0', 'xpu:0', 'iluvatar_gpu:0' 等。 - """ - import paddle + return forward_path, grad_path + def _run_on_device(self, device_str): try: paddle.set_device(device_str) except Exception as e: @@ -80,7 +70,6 @@ def _run_on_device(self, device_str): print(f"[gen_paddle_input failed] device={device_str}", flush=True) return None, None - # 前向 try: if self.test_amp: with paddle.amp.auto_cast(): @@ -91,7 +80,6 @@ def _run_on_device(self, device_str): print(f"[forward error] device={device_str} {self.api_config.config}\n{err}", flush=True) return None, None - # 反向 out_grads = None if self.need_check_grad(): inputs_list = self.get_paddle_input_list() @@ -126,14 +114,11 @@ def _run_on_device(self, device_str): return output, out_grads - # -------------------- 主流程:GPU vs Custom 设备 -------------------- def test(self): - # 1. 是否跳过 if self.need_skip(): print("[Skip]", self.api_config.config, flush=True) return - # 2. 解析 Paddle API 信息 & 生成 numpy 输入 if not self.ana_paddle_api_info(): print("[ana_paddle_api_info failed]", self.api_config.config, flush=True) return @@ -146,59 +131,64 @@ def test(self): print("[numpy error]", self.api_config.config, "\n", str(err), flush=True) return - # 3. 确定 GPU / 自定义设备字符串 - gpu_device_str = f"gpu:{self.gpu_id}" + device_type = detect_device_type() + try: + if paddle.device.is_compiled_with_cuda(): + device_type = "gpu" + elif paddle.device.is_compiled_with_xpu(): + device_type = "xpu" + else: + custom_types = paddle.device.get_all_custom_device_type() + if custom_types: + device_type = custom_types[0] + except Exception as e: + print(f"[detect device error] {e}", flush=True) + return - if self.custom_device_type is None: - # 自动探测:优先 XPU,再尝试自定义设备 - try: - if paddle.device.is_compiled_with_xpu(): - self.custom_device_type = "xpu" - else: - custom_types = paddle.device.get_all_custom_device_type() - if custom_types: - self.custom_device_type = custom_types[0] - else: - print( - "[no custom device available] " - "compiled_without_xpu and no custom_device_type found.", - self.api_config.config, - flush=True, - ) - return - except Exception as e: - print(f"[detect custom device error] {e}", flush=True) - return + if device_type is None: + print("[no available device]", self.api_config.config, flush=True) + return - custom_device_str = ( - f"{self.custom_device_type}:{self.custom_device_id}" - if self.custom_device_type != "xpu" - else f"xpu:{self.custom_device_id}" - ) + device_str = f"{device_type}:0" print( f"{datetime.now()} [Begin] {self.api_config.config}\n" - f" GPU device : {gpu_device_str}\n" - f" Custom device: {custom_device_str}", + f" Device: {device_str}", flush=True, ) - # 4. GPU 上运行 - gpu_out, gpu_grads = self._run_on_device(gpu_device_str) - if gpu_out is None: - print("[gpu execution failed]", self.api_config.config, flush=True) + out, grads = self._run_on_device(device_str) + if out is None: + print(f"[{device_str} execution failed]", self.api_config.config, flush=True) else: - self._dump_results("gpu", gpu_out, gpu_grads) - print("[gpu dump done]", self.api_config.config, flush=True) + forward_path, grad_path = self._dump_results(device_type, out, grads) + print(f"[{device_type} dump done]", self.api_config.config, flush=True) - # 5. 自定义设备 / XPU 上运行 - custom_out, custom_grads = self._run_on_device(custom_device_str) - if custom_out is None: - print(f"[{custom_device_str} execution failed]", self.api_config.config, flush=True) - else: - tag = self.custom_device_type if self.custom_device_type is not None else "custom" - self._dump_results(tag, custom_out, custom_grads) - print(f"[{tag} dump done]", self.api_config.config, flush=True) + if forward_path is not None: + try: + loaded_forward = paddle.load(forward_path) + print(f"[loaded forward] {forward_path}") + for i, t in enumerate(loaded_forward): + arr = t.numpy().flatten() + print( + f" forward[{i}] shape={t.shape}, dtype={t.dtype}, " + f"first_values={arr[:10]}" + ) + except Exception as e: + print(f"[load forward error] {forward_path} -> {e}", flush=True) + + if grad_path is not None: + try: + loaded_grads = paddle.load(grad_path) + print(f"[loaded grad] {grad_path}") + for i, t in enumerate(loaded_grads): + arr = t.numpy().flatten() + print( + f" grad[{i}] shape={t.shape}, dtype={t.dtype}, " + f"first_values={arr[:10]}" + ) + except Exception as e: + print(f"[load grad error] {grad_path} -> {e}", flush=True) def parse_bool(v): @@ -213,45 +203,21 @@ def parse_bool(v): def main(): - parser = argparse.ArgumentParser( - description="在 GPU / 自定义设备 上运行 API case,并将前向 + 反向结果以 npz 落盘。" - ) + parser = argparse.ArgumentParser() parser.add_argument( "--api_config", type=str, required=True, - help="单条 API 配置(与 engine 中的 api_config 字符串格式一致)", ) parser.add_argument( "--dump_dir", type=str, default="report/gpu_custom_dump", - help="结果保存目录(npz 文件会按 API 配置分子目录存放)", ) parser.add_argument( "--test_amp", type=parse_bool, default=False, - help="是否在前向中启用 AMP 自动混合精度", - ) - parser.add_argument( - "--gpu_id", - type=int, - default=0, - help="使用的 GPU 设备号(形如 gpu:)", - ) - parser.add_argument( - "--custom_device_type", - type=str, - default=None, - help="自定义设备类型名称,例如 'xpu'、'iluvatar_gpu' 等;" - "留空则自动探测:优先 XPU,再尝试 paddle 自定义设备。", - ) - parser.add_argument( - "--custom_device_id", - type=int, - default=0, - help="自定义设备 ID,如 xpu:0 / iluvatar_gpu:0 中的 0", ) args = parser.parse_args() @@ -268,9 +234,6 @@ def main(): api_config, dump_dir=args.dump_dir, test_amp=args.test_amp, - gpu_id=args.gpu_id, - custom_device_type=args.custom_device_type, - custom_device_id=args.custom_device_id, ) try: case.test() @@ -282,5 +245,3 @@ def main(): if __name__ == "__main__": main() - - From 137d40fe9e8c4d8e87e6e3ef770290846890e19d Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Fri, 5 Dec 2025 17:19:00 +0800 Subject: [PATCH 03/13] add customvsgpu --- engineV2.py | 46 +++- tester/__init__.py | 5 + tester/gpu_custom_dump.py | 247 -------------------- tester/paddle_device_vs_gpu.py | 411 +++++++++++++++++++++++++++++++++ 4 files changed, 459 insertions(+), 250 deletions(-) delete mode 100644 tester/gpu_custom_dump.py create mode 100644 tester/paddle_device_vs_gpu.py diff --git a/engineV2.py b/engineV2.py index 01c92a74..f0785939 100644 --- a/engineV2.py +++ b/engineV2.py @@ -29,6 +29,7 @@ APITestPaddleTorchGPUPerformance, APITestAccuracyStable, APITestCustomDeviceVSCPU, + APITestPaddleDeviceVSGPU, ) import torch import paddle @@ -38,7 +39,10 @@ os.environ["FLAGS_use_system_allocator"] = "1" os.environ["NVIDIA_TF32_OVERRIDE"] = "0" -VALID_TEST_ARGS = {"test_amp", "test_backward", "atol", "rtol", "test_tol"} +VALID_TEST_ARGS = { + "test_amp", "test_backward", "atol", "rtol", "test_tol", + "operation_mode", "bos_path", "target_device_type", "random_seed" +} DEVICE_TYPE = None DEVICE_TYPE_DETECTED = False @@ -384,7 +388,8 @@ def pid_exists(pid): APITestPaddleOnly, APITestPaddleTorchGPUPerformance, APITestTorchGPUPerformance, - APITestCustomDeviceVSCPU) + APITestCustomDeviceVSCPU, + APITestPaddleDeviceVSGPU) test_classes = { "APIConfig": APIConfig, @@ -395,7 +400,8 @@ def pid_exists(pid): "APITestTorchGPUPerformance": APITestTorchGPUPerformance, "APITestPaddleTorchGPUPerformance": APITestPaddleTorchGPUPerformance, "APITestAccuracyStable": APITestAccuracyStable, - "APITestCustomDeviceVSCPU": APITestCustomDeviceVSCPU + "APITestCustomDeviceVSCPU": APITestCustomDeviceVSCPU, + "APITestPaddleDeviceVSGPU": APITestPaddleDeviceVSGPU } globals().update(test_classes) @@ -466,6 +472,7 @@ def run_test_case(api_config_str, options): "paddle_torch_gpu_performance": APITestPaddleTorchGPUPerformance, "accuracy_stable": APITestAccuracyStable, "paddle_custom_device": APITestCustomDeviceVSCPU, + "custom_device_vs_gpu": APITestPaddleDeviceVSGPU, } test_class = next( (cls for opt, cls in option_to_class.items() if getattr(options, opt, False)), @@ -646,6 +653,30 @@ def main(): default=0, help="The numpy random seed ", ) + parser.add_argument( + "--custom_device_vs_gpu", + type=parse_bool, + default=False, + help="test paddle api on custom device vs GPU", + ) + parser.add_argument( + "--operation_mode", + type=str, + choices=["upload", "download"], + help="Operation mode: upload or download", + ) + parser.add_argument( + "--bos_path", + type=str, + default="", + help="BOS storage path (required when operation_mode is specified)", + ) + parser.add_argument( + "--target_device_type", + type=str, + choices=["gpu", "paddle_device"], + help="Target device type for download mode", + ) options = parser.parse_args() print(f"Options: {vars(options)}", flush=True) @@ -661,6 +692,7 @@ def main(): options.paddle_torch_gpu_performance, options.accuracy_stable, options.paddle_custom_device, + options.custom_device_vs_gpu, ] if len([m for m in mode if m is True]) != 1: print( @@ -673,10 +705,18 @@ def main(): "--paddle_torch_gpu_performance" "--accuracy_stable" "--paddle_custom_device" + "--custom_device_vs_gpu" " to True.", flush=True, ) return + if options.custom_device_vs_gpu: + if options.operation_mode and not options.bos_path: + print("--bos_path is required when --operation_mode is specified", flush=True) + return + if options.operation_mode == "download" and not options.target_device_type: + print("--target_device_type is required in download mode", flush=True) + return if options.test_tol and not options.accuracy: print(f"--test_tol takes effect when --accuracy is True.", flush=True) if options.test_backward and not options.paddle_cinn: diff --git a/tester/__init__.py b/tester/__init__.py index b758104a..01d73b45 100644 --- a/tester/__init__.py +++ b/tester/__init__.py @@ -12,6 +12,7 @@ 'APITestPaddleTorchGPUPerformance', 'APITestAccuracyStable', 'APITestCustomDeviceVSCPU', + 'APITestPaddleDeviceVSGPU', 'paddle_to_torch', 'TensorConfig', 'APIConfig', @@ -32,6 +33,7 @@ from .paddle_cinn_vs_dygraph import APITestCINNVSDygraph from .accuracy_stable import APITestAccuracyStable from .paddle_device_vs_cpu import APITestCustomDeviceVSCPU + from .paddle_device_vs_gpu import APITestPaddleDeviceVSGPU from . import paddle_to_torch from .api_config import ( TensorConfig, @@ -74,6 +76,9 @@ def __getattr__(name: str) -> Any: elif name == 'APITestCustomDeviceVSCPU': from .paddle_device_vs_cpu import APITestCustomDeviceVSCPU return APITestCustomDeviceVSCPU + elif name == 'APITestPaddleDeviceVSGPU': + from .paddle_device_vs_gpu import APITestPaddleDeviceVSGPU + return APITestPaddleDeviceVSGPU elif name == 'paddle_to_torch': from . import paddle_to_torch return paddle_to_torch diff --git a/tester/gpu_custom_dump.py b/tester/gpu_custom_dump.py deleted file mode 100644 index 7625895b..00000000 --- a/tester/gpu_custom_dump.py +++ /dev/null @@ -1,247 +0,0 @@ -import argparse -import os -import hashlib -from datetime import datetime -from engineV2 import detect_device_type - -import paddle - -from . import APIConfig -from .base import APITestBase - - -class APITestGPUCustomDump(APITestBase): - def __init__( - self, - api_config, - dump_dir="gpu_custom_dump", - test_amp=False, - ): - super().__init__(api_config) - self.dump_dir = dump_dir - self.test_amp = test_amp - - def _ensure_dirs(self, path): - os.makedirs(path, exist_ok=True) - - def _to_tensor_list(self, x): - if x is None: - return None - if isinstance(x, paddle.Tensor): - return [x] - if isinstance(x, (list, tuple)): - tensors = [t for t in x if isinstance(t, paddle.Tensor)] - return tensors or None - return None - - def _dump_results(self, tag, output, grads): - api_name = self.api_config.config.replace("/", "_").replace(" ", "_") - dump_path = os.path.join(self.dump_dir, api_name) - self._ensure_dirs(dump_path) - - out_list = self._to_tensor_list(output) - grad_list = self._to_tensor_list(grads) - - key = f"{tag}-{api_name}" - sha16 = hashlib.sha256(key.encode("utf-8")).hexdigest()[:16] - file_prefix = f"{tag}-{api_name}-{sha16}" - - forward_path = None - grad_path = None - - if out_list is not None: - forward_path = os.path.join(dump_path, f"{file_prefix}_forward.pdtensor") - paddle.save(out_list, forward_path) - - if grad_list is not None: - grad_path = os.path.join(dump_path, f"{file_prefix}_grad.pdtensor") - paddle.save(grad_list, grad_path) - - return forward_path, grad_path - - def _run_on_device(self, device_str): - try: - paddle.set_device(device_str) - except Exception as e: - print(f"[device set error] {device_str} -> {e}", flush=True) - return None, None - - if not self.gen_paddle_input(): - print(f"[gen_paddle_input failed] device={device_str}", flush=True) - return None, None - - try: - if self.test_amp: - with paddle.amp.auto_cast(): - output = self.paddle_api(*tuple(self.paddle_args), **self.paddle_kwargs) - else: - output = self.paddle_api(*tuple(self.paddle_args), **self.paddle_kwargs) - except Exception as err: - print(f"[forward error] device={device_str} {self.api_config.config}\n{err}", flush=True) - return None, None - - out_grads = None - if self.need_check_grad(): - inputs_list = self.get_paddle_input_list() - try: - result_outputs, result_outputs_grads = self.gen_paddle_output_and_output_grad(output) - except Exception as grad_prepare_err: - print( - f"[backward prepare error] device={device_str} {self.api_config.config}\n{grad_prepare_err}", - flush=True, - ) - return output, None - - if inputs_list and result_outputs and result_outputs_grads: - try: - out_grads = paddle.grad( - result_outputs, - inputs_list, - grad_outputs=result_outputs_grads, - allow_unused=True, - ) - except Exception as grad_err: - print( - f"[backward error] device={device_str} {self.api_config.config}\n{grad_err}", - flush=True, - ) - out_grads = None - else: - print( - f"[backward skip] device={device_str} no valid inputs or outputs for gradient computation", - flush=True, - ) - - return output, out_grads - - def test(self): - if self.need_skip(): - print("[Skip]", self.api_config.config, flush=True) - return - - if not self.ana_paddle_api_info(): - print("[ana_paddle_api_info failed]", self.api_config.config, flush=True) - return - - try: - if not self.gen_numpy_input(): - print("[gen_numpy_input failed]", self.api_config.config, flush=True) - return - except Exception as err: - print("[numpy error]", self.api_config.config, "\n", str(err), flush=True) - return - - device_type = detect_device_type() - try: - if paddle.device.is_compiled_with_cuda(): - device_type = "gpu" - elif paddle.device.is_compiled_with_xpu(): - device_type = "xpu" - else: - custom_types = paddle.device.get_all_custom_device_type() - if custom_types: - device_type = custom_types[0] - except Exception as e: - print(f"[detect device error] {e}", flush=True) - return - - if device_type is None: - print("[no available device]", self.api_config.config, flush=True) - return - - device_str = f"{device_type}:0" - - print( - f"{datetime.now()} [Begin] {self.api_config.config}\n" - f" Device: {device_str}", - flush=True, - ) - - out, grads = self._run_on_device(device_str) - if out is None: - print(f"[{device_str} execution failed]", self.api_config.config, flush=True) - else: - forward_path, grad_path = self._dump_results(device_type, out, grads) - print(f"[{device_type} dump done]", self.api_config.config, flush=True) - - if forward_path is not None: - try: - loaded_forward = paddle.load(forward_path) - print(f"[loaded forward] {forward_path}") - for i, t in enumerate(loaded_forward): - arr = t.numpy().flatten() - print( - f" forward[{i}] shape={t.shape}, dtype={t.dtype}, " - f"first_values={arr[:10]}" - ) - except Exception as e: - print(f"[load forward error] {forward_path} -> {e}", flush=True) - - if grad_path is not None: - try: - loaded_grads = paddle.load(grad_path) - print(f"[loaded grad] {grad_path}") - for i, t in enumerate(loaded_grads): - arr = t.numpy().flatten() - print( - f" grad[{i}] shape={t.shape}, dtype={t.dtype}, " - f"first_values={arr[:10]}" - ) - except Exception as e: - print(f"[load grad error] {grad_path} -> {e}", flush=True) - - -def parse_bool(v): - if isinstance(v, bool): - return v - s = str(v).lower() - if s in {"true", "1", "yes", "y"}: - return True - if s in {"false", "0", "no", "n"}: - return False - raise argparse.ArgumentTypeError(f"Invalid bool value: {v}") - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--api_config", - type=str, - required=True, - ) - parser.add_argument( - "--dump_dir", - type=str, - default="report/gpu_custom_dump", - ) - parser.add_argument( - "--test_amp", - type=parse_bool, - default=False, - ) - - args = parser.parse_args() - - print(f"Options: {vars(args)}", flush=True) - - try: - api_config = APIConfig(args.api_config.strip()) - except Exception as err: - print(f"[config parse error] {args.api_config} {str(err)}", flush=True) - return - - case = APITestGPUCustomDump( - api_config, - dump_dir=args.dump_dir, - test_amp=args.test_amp, - ) - try: - case.test() - finally: - case.clear_tensor() - del case - del api_config - - -if __name__ == "__main__": - main() diff --git a/tester/paddle_device_vs_gpu.py b/tester/paddle_device_vs_gpu.py new file mode 100644 index 00000000..fc145132 --- /dev/null +++ b/tester/paddle_device_vs_gpu.py @@ -0,0 +1,411 @@ +import hashlib +import json +import os +import subprocess +import tempfile +import time +from pathlib import Path + +import numpy as np +import paddle + +from .api_config.log_writer import write_to_log +from .paddle_device_vs_cpu import APITestCustomDeviceVSCPU + + +class APITestPaddleDeviceVSGPU(APITestCustomDeviceVSCPU): + def __init__(self, api_config, **kwargs): + # 继承 CustomDevice vs CPU 的基本功能 + super().__init__(api_config, **kwargs) + + # 新增参数 + self.operation_mode = kwargs.get("operation_mode", None) + self.bos_path = kwargs.get("bos_path", "") + self.target_device_type = kwargs.get("target_device_type", "") + self.random_seed = kwargs.get("random_seed", 0) + self.atol = kwargs.get("atol", 1e-2) + self.rtol = kwargs.get("rtol", 1e-2) + + # 设置随机种子确保一致性 + if self.random_seed != 0: + np.random.seed(self.random_seed) + paddle.seed(self.random_seed) + + def _get_config_hash(self): + """生成API配置的哈希值,用于文件名""" + config_str = json.dumps({ + "api_name": self.api_config.api_name, + "args": [str(arg) for arg in self.api_config.args], + "kwargs": {k: str(v) for k, v in self.api_config.kwargs.items()} + }, sort_keys=True) + return hashlib.md5(config_str.encode()).hexdigest()[:16] + + def _get_local_device_type(self): + """获取当前设备的类型""" + try: + if torch.cuda.is_available(): # 检查GPU是否可用 + return "gpu" + elif self.check_xpu_available(): + return "xpu" + elif self.check_custom_device_available(): + return self.custom_device_type + else: + return "cpu" + except: + return "cpu" + + def _get_filename(self, device_type=None): + """生成PDTensor文件名""" + if device_type is None: + device_type = self._get_local_device_type() + return f"{device_type}-{self.random_seed}-{self._get_config_hash()}.pdtensor" + + def _save_tensor_locally(self, output, grads=None): + """保存结果到本地PDTensor文件""" + # 保存到临时文件 + temp_dir = tempfile.gettempdir() + filename = self._get_filename().replace('.npz', '.pdtensor') + local_path = Path(temp_dir) / filename + + # 使用paddle.save保存张量数据 + save_data = {'output': output} + if grads is not None: + save_data['grads'] = grads + + paddle.save(save_data, str(local_path)) + print(f"[upload] Saved pdtensor file: {local_path}", flush=True) + return local_path + + def _upload_to_bos(self, local_path): + """上传文件到指定路径,支持本地和BOS""" + if not self.bos_path: + print(f"[upload] No bos_path specified, skip upload", flush=True) + return + + try: + # 判断路径类型:本地路径还是BOS路径 + if self.bos_path.startswith("bos://"): + # BOS路径:使用bcecmd工具上传 + remote_path = f"{self.bos_path.rstrip('/')}/{local_path.name}" + print(f"[upload] Starting upload to BOS: {remote_path}", flush=True) + + cmd = ["bcecmd", "bos", "cp", str(local_path), remote_path] + result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) + + if result.returncode == 0: + print(f"[upload] Upload succeeded: {remote_path}", flush=True) + local_path.unlink(missing_ok=True) + else: + print(f"[upload] Upload failed: {remote_path}, error: {result.stderr}", flush=True) + else: + # 本地路径:直接复制文件 + local_bos_path = Path(self.bos_path).resolve() + remote_path = local_bos_path / local_path.name + + # 确保目录存在 + local_bos_path.mkdir(parents=True, exist_ok=True) + print(f"[upload] Copying file to local path: {remote_path}", flush=True) + + # 复制文件 + import shutil + shutil.copy2(local_path, remote_path) + print(f"[upload] File copied successfully: {remote_path}", flush=True) + + # 删除临时文件 + local_path.unlink(missing_ok=True) + + except Exception as e: + print(f"[upload] Upload failed: {e}", flush=True) + + def _download_from_bos(self, filename): + """从指定路径下载文件,支持本地和BOS""" + if not self.bos_path: + print(f"[download] No bos_path specified, skip download", flush=True) + return None + + temp_dir = tempfile.gettempdir() + local_path = Path(temp_dir) / filename + + if local_path.exists(): + print(f"[download] File already exists locally: {local_path}", flush=True) + return local_path + + try: + # 判断路径类型:本地路径还是BOS路径 + if self.bos_path.startswith("bos://"): + # BOS路径:使用bcecmd工具下载 + remote_path = f"{self.bos_path.rstrip('/')}/{filename}" + print(f"[download] Starting download from BOS: {remote_path}", flush=True) + + cmd = ["bcecmd", "bos", "cp", remote_path, str(local_path)] + result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) + + if result.returncode == 0: + print(f"[download] Download succeeded: {local_path}", flush=True) + return local_path + else: + print(f"[download] Download failed: {remote_path}, error: {result.stderr}", flush=True) + return None + else: + # 本地路径:直接复制文件 + local_bos_path = Path(self.bos_path).resolve() + remote_path = local_bos_path / filename + + print(f"[download] Copying file from local path: {remote_path}", flush=True) + + if not remote_path.exists(): + print(f"[download] File not found: {remote_path}", flush=True) + return None + + # 复制文件 + import shutil + shutil.copy2(remote_path, local_path) + print(f"[download] File copied successfully: {local_path}", flush=True) + return local_path + + except Exception as e: + print(f"[download] Download failed: {e}", flush=True) + return None + + def _run_paddle_on_gpu(self): + """在GPU上运行Paddle实现""" + try: + # 设置GPU设备 + paddle.set_device("gpu:0") + + # 解析Paddle API信息 + if not self.ana_paddle_api_info(): + print("ana_paddle_api_info failed", flush=True) + return None, None + + # 生成输入数据 + if not self.gen_numpy_input(): + print("gen_numpy_input failed", flush=True) + return None, None + + if not self.gen_paddle_input(): + print("gen_paddle_input failed", flush=True) + return None, None + + # 执行Forward + paddle_output = self.paddle_api(*tuple(self.paddle_args), **self.paddle_kwargs) + + # 执行Backward(如果需要) + paddle_grads = None + if self.need_check_grad(): + inputs_list = self.get_paddle_input_list() + result_outputs, result_outputs_grads = self.gen_paddle_output_and_output_grad(paddle_output) + if inputs_list and result_outputs and result_outputs_grads: + paddle_grads = paddle.grad( + outputs=result_outputs, + inputs=inputs_list, + grad_outputs=result_outputs_grads, + allow_unused=True + ) + + return paddle_output, paddle_grads + + except Exception as e: + print(f"[paddle gpu error] {self.api_config.config}: {e}", flush=True) + write_to_log("paddle_error", self.api_config.config) + return None, None + + def _run_paddle_on_custom_device(self): + """在Paddle自定义设备上运行""" + try: + paddle_device_type = "cpu" # 默认为CPU + + # 设置自定义设备 + if self.check_xpu_available(): + paddle.set_device(f"xpu:{self.xpu_device_id}") + paddle_device_type = "xpu" + elif self.check_custom_device_available(): + paddle.set_device(f"{self.custom_device_type}:{self.custom_device_id}") + paddle_device_type = self.custom_device_type + else: + print(f"[error] No custom device available", flush=True) + return None, None + + # 解析Paddle API信息 + if not self.ana_paddle_api_info(): + print("ana_paddle_api_info failed", flush=True) + return None, None + + # 生成输入数据 + if not self.gen_numpy_input(): + print("gen_numpy_input failed", flush=True) + return None, None + + if not self.gen_paddle_input(): + print("gen_paddle_input failed", flush=True) + return None, None + + # 执行Forward + paddle_output = self.paddle_api(*tuple(self.paddle_args), **self.paddle_kwargs) + + # 执行Backward(如果需要) + paddle_grads = None + if self.need_check_grad(): + inputs_list = self.get_paddle_input_list() + result_outputs, result_outputs_grads = self.gen_paddle_output_and_output_grad(paddle_output) + if inputs_list and result_outputs and result_outputs_grads: + paddle_grads = paddle.grad( + outputs=result_outputs, + inputs=inputs_list, + grad_outputs=result_outputs_grads, + allow_unused=True + ) + + return paddle_output, paddle_grads + + except Exception as e: + print(f"[paddle {paddle_device_type} error] {self.api_config.config}: {e}", flush=True) + write_to_log("paddle_error", self.api_config.config) + return None, None + + def _compare_with_downloaded(self, local_output, local_grads, downloaded_tensor): + """与下载的结果进行对比""" + try: + print(f"[compare] Comparing results for {self.api_config.config}", flush=True) + + # 加载下载的数据 + remote_data = paddle.load(str(downloaded_tensor)) + remote_output = remote_data['output'] + + # 对比Forward输出(直接使用Paddle对比) + try: + if isinstance(local_output, paddle.Tensor) and isinstance(remote_output, paddle.Tensor): + # 使用Paddle的对比方法 + np.testing.assert_allclose( + local_output.numpy(), remote_output.numpy(), + atol=self.atol, rtol=self.rtol, equal_nan=True + ) + elif isinstance(local_output, (list, tuple)) and isinstance(remote_output, (list, tuple)): + # 列表或元组对比 + for i, (local_item, remote_item) in enumerate(zip(local_output, remote_output)): + if isinstance(local_item, paddle.Tensor) and isinstance(remote_item, paddle.Tensor): + np.testing.assert_allclose( + local_item.numpy(), remote_item.numpy(), + atol=self.atol, rtol=self.rtol, equal_nan=True + ) + print(f"[compare] Forward output[{i}] comparison passed", flush=True) + else: + # 其他情况,尝试转换为numpy对比 + local_np = local_output.numpy() if isinstance(local_output, paddle.Tensor) else np.array(local_output) + remote_np = remote_output.numpy() if isinstance(remote_output, paddle.Tensor) else np.array(remote_output) + np.testing.assert_allclose(local_np, remote_np, atol=self.atol, rtol=self.rtol, equal_nan=True) + + print(f"[compare] Forward accuracy check passed for {self.api_config.config}", flush=True) + except Exception as e: + print(f"[compare] Forward accuracy check failed for {self.api_config.config}, error: {e}", flush=True) + write_to_log("accuracy_error", self.api_config.config) + return False + + # 对比Backward梯度(如果存在且Forward通过) + if local_grads is not None and 'grads' in remote_data: + remote_grads = remote_data['grads'] + + try: + if isinstance(local_grads, (list, tuple)) and isinstance(remote_grads, (list, tuple)): + for i, (local_grad, remote_grad) in enumerate(zip(local_grads, remote_grads)): + if isinstance(local_grad, paddle.Tensor) and isinstance(remote_grad, paddle.Tensor): + np.testing.assert_allclose( + local_grad.numpy(), remote_grad.numpy(), + atol=self.atol, rtol=self.rtol, equal_nan=True + ) + print(f"[compare] Backward gradient[{i}] comparison passed", flush=True) + elif isinstance(local_grads, paddle.Tensor) and isinstance(remote_grads, paddle.Tensor): + np.testing.assert_allclose( + local_grads.numpy(), remote_grads.numpy(), + atol=self.atol, rtol=self.rtol, equal_nan=True + ) + + print(f"[compare] Backward gradient check passed for {self.api_config.config}", flush=True) + except Exception as e: + print(f"[compare] Backward gradient check failed for {self.api_config.config}, error: {e}", flush=True) + return False + + print(f"[compare] Accuracy check passed for {self.api_config.config}", flush=True) + write_to_log("pass", self.api_config.config) + return True + + except Exception as e: + print(f"[compare] Comparison failed for {self.api_config.config}, error: {e}", flush=True) + write_to_log("accuracy_error", self.api_config.config) + return False + + def test(self): + """Main test function""" + if self.operation_mode == "upload": + self._test_upload_mode() + elif self.operation_mode == "download": + self._test_download_mode() + else: + # 默认模式:本地直接对比 + print("[info] No operation mode specified, running in local mode") + self._test_local_mode() + + def _test_upload_mode(self): + """Upload模式:执行测试并上传结果""" + print(f"[upload] Starting upload mode for {self.api_config.config}", flush=True) + + local_device_type = self._get_local_device_type() + + if local_device_type == "gpu": + # GPU端:使用Paddle在GPU上执行 + output, grads = self._run_paddle_on_gpu() + else: + # PaddleDevice端:使用Paddle在自定义设备上执行 + output, grads = self._run_paddle_on_custom_device() + + if output is None: + print(f"[upload] Execution failed for {self.api_config.config}", flush=True) + return + + # 保存结果到本地PDTensor + local_path = self._save_tensor_locally(output, grads) + + # 异步上传到BOS + self._upload_to_bos(local_path) + + print(f"[upload] Upload mode completed for {self.api_config.config}", flush=True) + + def _test_download_mode(self): + """Download模式:下载对比数据并验证""" + print(f"[download] Starting download mode for {self.api_config.config}", flush=True) + + # 确定要下载的文件名 + target_filename = self._get_filename(self.target_device_type) + + # 下载文件 + downloaded_file = self._download_from_bos(target_filename) + if downloaded_file is None: + print(f"[download] Failed to download comparison data for {self.api_config.config}", flush=True) + return + + # 在本地设备上执行测试 + local_device_type = self._get_local_device_type() + + if local_device_type == "gpu": + # GPU端:使用Paddle在GPU上执行 + local_output, local_grads = self._run_paddle_on_gpu() + else: + # PaddleDevice端:使用Paddle在自定义设备上执行 + local_output, local_grads = self._run_paddle_on_custom_device() + + if local_output is None: + print(f"[download] Local execution failed for {self.api_config.config}", flush=True) + return + + # 与下载的结果进行对比 + success = self._compare_with_downloaded(local_output, local_grads, downloaded_file) + + # 清理下载的文件 + downloaded_file.unlink(missing_ok=True) + + print(f"[download] Download mode completed for {self.api_config.config}", flush=True) + + def _test_local_mode(self): + """默认模式:本地直接对比(暂不支持)""" + print(f"[local] Local mode not implemented yet for {self.api_config.config}", flush=True) + print("[info] Please specify --operation_mode=upload or --operation_mode=download", flush=True) From e93a4994d058134e6d8b6844c802202140632a70 Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Tue, 9 Dec 2025 15:40:06 +0800 Subject: [PATCH 04/13] use bos --- engineV2.py | 25 ++- tester/paddle_device_vs_gpu.py | 400 ++++++++++++++++++++------------- 2 files changed, 263 insertions(+), 162 deletions(-) diff --git a/engineV2.py b/engineV2.py index f0785939..89a1d08e 100644 --- a/engineV2.py +++ b/engineV2.py @@ -40,8 +40,17 @@ os.environ["NVIDIA_TF32_OVERRIDE"] = "0" VALID_TEST_ARGS = { - "test_amp", "test_backward", "atol", "rtol", "test_tol", - "operation_mode", "bos_path", "target_device_type", "random_seed" + "test_amp", + "test_backward", + "atol", + "rtol", + "test_tol", + "operation_mode", + "bos_path", + "target_device_type", + "random_seed", + "bos_conf_path", + "bcecmd_path", } DEVICE_TYPE = None @@ -671,6 +680,18 @@ def main(): default="", help="BOS storage path (required when operation_mode is specified)", ) + parser.add_argument( + "--bos_conf_path", + type=str, + default="./conf", + help="Path for bcecmd --conf-path when using BOS", + ) + parser.add_argument( + "--bcecmd_path", + type=str, + default="./bcecmd", + help="bcecmd binary path used for BOS upload/download", + ) parser.add_argument( "--target_device_type", type=str, diff --git a/tester/paddle_device_vs_gpu.py b/tester/paddle_device_vs_gpu.py index fc145132..a74235f0 100644 --- a/tester/paddle_device_vs_gpu.py +++ b/tester/paddle_device_vs_gpu.py @@ -1,9 +1,7 @@ import hashlib import json -import os import subprocess import tempfile -import time from pathlib import Path import numpy as np @@ -17,7 +15,7 @@ class APITestPaddleDeviceVSGPU(APITestCustomDeviceVSCPU): def __init__(self, api_config, **kwargs): # 继承 CustomDevice vs CPU 的基本功能 super().__init__(api_config, **kwargs) - + # 新增参数 self.operation_mode = kwargs.get("operation_mode", None) self.bos_path = kwargs.get("bos_path", "") @@ -25,19 +23,24 @@ def __init__(self, api_config, **kwargs): self.random_seed = kwargs.get("random_seed", 0) self.atol = kwargs.get("atol", 1e-2) self.rtol = kwargs.get("rtol", 1e-2) - + self.bcecmd_path = Path(kwargs.get("bcecmd_path", "./bcecmd")).resolve() + self.bos_conf_path = kwargs.get("bos_conf_path", "./conf") + # 设置随机种子确保一致性 if self.random_seed != 0: np.random.seed(self.random_seed) paddle.seed(self.random_seed) - + def _get_config_hash(self): """生成API配置的哈希值,用于文件名""" - config_str = json.dumps({ - "api_name": self.api_config.api_name, - "args": [str(arg) for arg in self.api_config.args], - "kwargs": {k: str(v) for k, v in self.api_config.kwargs.items()} - }, sort_keys=True) + config_str = json.dumps( + { + "api_name": self.api_config.api_name, + "args": [str(arg) for arg in self.api_config.args], + "kwargs": {k: str(v) for k, v in self.api_config.kwargs.items()}, + }, + sort_keys=True, + ) return hashlib.md5(config_str.encode()).hexdigest()[:16] def _get_local_device_type(self): @@ -64,105 +67,81 @@ def _save_tensor_locally(self, output, grads=None): """保存结果到本地PDTensor文件""" # 保存到临时文件 temp_dir = tempfile.gettempdir() - filename = self._get_filename().replace('.npz', '.pdtensor') + filename = self._get_filename().replace(".npz", ".pdtensor") local_path = Path(temp_dir) / filename - + # 使用paddle.save保存张量数据 - save_data = {'output': output} + save_data = {"output": output} if grads is not None: - save_data['grads'] = grads - + save_data["grads"] = grads + paddle.save(save_data, str(local_path)) print(f"[upload] Saved pdtensor file: {local_path}", flush=True) return local_path + def _build_bos_path(self, filename: str) -> str: + cleaned = self.bos_path.strip().lstrip("/").rstrip("/") + return f"bos:/{cleaned}/{filename}" + + def _bcecmd_cp(self, src: str, dst: str, action: str): + """使用指定的 bcecmd 命令执行 cp 操作""" + cmd = [ + str(self.bcecmd_path), + "--conf-path", + self.bos_conf_path, + "bos", + "cp", + src, + dst, + ] + print(f"[{action}] Running command: {' '.join(cmd)}", flush=True) + return subprocess.run(cmd, capture_output=True, text=True, timeout=300) + def _upload_to_bos(self, local_path): - """上传文件到指定路径,支持本地和BOS""" + """使用 bcecmd 上传文件到 BOS""" if not self.bos_path: print(f"[upload] No bos_path specified, skip upload", flush=True) return - + + remote_path = self._build_bos_path(local_path.name) try: - # 判断路径类型:本地路径还是BOS路径 - if self.bos_path.startswith("bos://"): - # BOS路径:使用bcecmd工具上传 - remote_path = f"{self.bos_path.rstrip('/')}/{local_path.name}" - print(f"[upload] Starting upload to BOS: {remote_path}", flush=True) - - cmd = ["bcecmd", "bos", "cp", str(local_path), remote_path] - result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) - - if result.returncode == 0: - print(f"[upload] Upload succeeded: {remote_path}", flush=True) - local_path.unlink(missing_ok=True) - else: - print(f"[upload] Upload failed: {remote_path}, error: {result.stderr}", flush=True) - else: - # 本地路径:直接复制文件 - local_bos_path = Path(self.bos_path).resolve() - remote_path = local_bos_path / local_path.name - - # 确保目录存在 - local_bos_path.mkdir(parents=True, exist_ok=True) - print(f"[upload] Copying file to local path: {remote_path}", flush=True) - - # 复制文件 - import shutil - shutil.copy2(local_path, remote_path) - print(f"[upload] File copied successfully: {remote_path}", flush=True) - - # 删除临时文件 + result = self._bcecmd_cp(str(local_path), remote_path, "upload") + if result.returncode == 0: + print(f"[upload] Upload succeeded: {remote_path}", flush=True) local_path.unlink(missing_ok=True) - + else: + print( + f"[upload] Upload failed: {remote_path}, stderr: {result.stderr}", + flush=True, + ) except Exception as e: print(f"[upload] Upload failed: {e}", flush=True) def _download_from_bos(self, filename): - """从指定路径下载文件,支持本地和BOS""" + """使用 bcecmd 从 BOS 下载文件""" if not self.bos_path: print(f"[download] No bos_path specified, skip download", flush=True) return None - + temp_dir = tempfile.gettempdir() local_path = Path(temp_dir) / filename - + if local_path.exists(): print(f"[download] File already exists locally: {local_path}", flush=True) return local_path + remote_path = self._build_bos_path(filename) try: - # 判断路径类型:本地路径还是BOS路径 - if self.bos_path.startswith("bos://"): - # BOS路径:使用bcecmd工具下载 - remote_path = f"{self.bos_path.rstrip('/')}/{filename}" - print(f"[download] Starting download from BOS: {remote_path}", flush=True) - - cmd = ["bcecmd", "bos", "cp", remote_path, str(local_path)] - result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) - - if result.returncode == 0: - print(f"[download] Download succeeded: {local_path}", flush=True) - return local_path - else: - print(f"[download] Download failed: {remote_path}, error: {result.stderr}", flush=True) - return None - else: - # 本地路径:直接复制文件 - local_bos_path = Path(self.bos_path).resolve() - remote_path = local_bos_path / filename - - print(f"[download] Copying file from local path: {remote_path}", flush=True) - - if not remote_path.exists(): - print(f"[download] File not found: {remote_path}", flush=True) - return None - - # 复制文件 - import shutil - shutil.copy2(remote_path, local_path) - print(f"[download] File copied successfully: {local_path}", flush=True) + result = self._bcecmd_cp(remote_path, str(local_path), "download") + if result.returncode == 0: + print(f"[download] Download succeeded: {local_path}", flush=True) return local_path - + else: + print( + f"[download] Download failed: {remote_path}, stderr: {result.stderr}", + flush=True, + ) + return None except Exception as e: print(f"[download] Download failed: {e}", flush=True) return None @@ -172,7 +151,7 @@ def _run_paddle_on_gpu(self): try: # 设置GPU设备 paddle.set_device("gpu:0") - + # 解析Paddle API信息 if not self.ana_paddle_api_info(): print("ana_paddle_api_info failed", flush=True) @@ -188,23 +167,27 @@ def _run_paddle_on_gpu(self): return None, None # 执行Forward - paddle_output = self.paddle_api(*tuple(self.paddle_args), **self.paddle_kwargs) - + paddle_output = self.paddle_api( + *tuple(self.paddle_args), **self.paddle_kwargs + ) + # 执行Backward(如果需要) paddle_grads = None if self.need_check_grad(): inputs_list = self.get_paddle_input_list() - result_outputs, result_outputs_grads = self.gen_paddle_output_and_output_grad(paddle_output) + result_outputs, result_outputs_grads = ( + self.gen_paddle_output_and_output_grad(paddle_output) + ) if inputs_list and result_outputs and result_outputs_grads: paddle_grads = paddle.grad( - outputs=result_outputs, - inputs=inputs_list, + outputs=result_outputs, + inputs=inputs_list, grad_outputs=result_outputs_grads, - allow_unused=True + allow_unused=True, ) - + return paddle_output, paddle_grads - + except Exception as e: print(f"[paddle gpu error] {self.api_config.config}: {e}", flush=True) write_to_log("paddle_error", self.api_config.config) @@ -214,7 +197,7 @@ def _run_paddle_on_custom_device(self): """在Paddle自定义设备上运行""" try: paddle_device_type = "cpu" # 默认为CPU - + # 设置自定义设备 if self.check_xpu_available(): paddle.set_device(f"xpu:{self.xpu_device_id}") @@ -241,96 +224,171 @@ def _run_paddle_on_custom_device(self): return None, None # 执行Forward - paddle_output = self.paddle_api(*tuple(self.paddle_args), **self.paddle_kwargs) + paddle_output = self.paddle_api( + *tuple(self.paddle_args), **self.paddle_kwargs + ) # 执行Backward(如果需要) paddle_grads = None if self.need_check_grad(): inputs_list = self.get_paddle_input_list() - result_outputs, result_outputs_grads = self.gen_paddle_output_and_output_grad(paddle_output) + result_outputs, result_outputs_grads = ( + self.gen_paddle_output_and_output_grad(paddle_output) + ) if inputs_list and result_outputs and result_outputs_grads: paddle_grads = paddle.grad( - outputs=result_outputs, - inputs=inputs_list, + outputs=result_outputs, + inputs=inputs_list, grad_outputs=result_outputs_grads, - allow_unused=True + allow_unused=True, ) - + return paddle_output, paddle_grads - + except Exception as e: - print(f"[paddle {paddle_device_type} error] {self.api_config.config}: {e}", flush=True) + print( + f"[paddle {paddle_device_type} error] {self.api_config.config}: {e}", + flush=True, + ) write_to_log("paddle_error", self.api_config.config) return None, None def _compare_with_downloaded(self, local_output, local_grads, downloaded_tensor): """与下载的结果进行对比""" try: - print(f"[compare] Comparing results for {self.api_config.config}", flush=True) - + print( + f"[compare] Comparing results for {self.api_config.config}", flush=True + ) + # 加载下载的数据 remote_data = paddle.load(str(downloaded_tensor)) - remote_output = remote_data['output'] - + remote_output = remote_data["output"] + # 对比Forward输出(直接使用Paddle对比) try: - if isinstance(local_output, paddle.Tensor) and isinstance(remote_output, paddle.Tensor): + if isinstance(local_output, paddle.Tensor) and isinstance( + remote_output, paddle.Tensor + ): # 使用Paddle的对比方法 np.testing.assert_allclose( - local_output.numpy(), remote_output.numpy(), - atol=self.atol, rtol=self.rtol, equal_nan=True + local_output.numpy(), + remote_output.numpy(), + atol=self.atol, + rtol=self.rtol, + equal_nan=True, ) - elif isinstance(local_output, (list, tuple)) and isinstance(remote_output, (list, tuple)): + elif isinstance(local_output, (list, tuple)) and isinstance( + remote_output, (list, tuple) + ): # 列表或元组对比 - for i, (local_item, remote_item) in enumerate(zip(local_output, remote_output)): - if isinstance(local_item, paddle.Tensor) and isinstance(remote_item, paddle.Tensor): + for i, (local_item, remote_item) in enumerate( + zip(local_output, remote_output) + ): + if isinstance(local_item, paddle.Tensor) and isinstance( + remote_item, paddle.Tensor + ): np.testing.assert_allclose( - local_item.numpy(), remote_item.numpy(), - atol=self.atol, rtol=self.rtol, equal_nan=True + local_item.numpy(), + remote_item.numpy(), + atol=self.atol, + rtol=self.rtol, + equal_nan=True, + ) + print( + f"[compare] Forward output[{i}] comparison passed", + flush=True, ) - print(f"[compare] Forward output[{i}] comparison passed", flush=True) else: # 其他情况,尝试转换为numpy对比 - local_np = local_output.numpy() if isinstance(local_output, paddle.Tensor) else np.array(local_output) - remote_np = remote_output.numpy() if isinstance(remote_output, paddle.Tensor) else np.array(remote_output) - np.testing.assert_allclose(local_np, remote_np, atol=self.atol, rtol=self.rtol, equal_nan=True) - - print(f"[compare] Forward accuracy check passed for {self.api_config.config}", flush=True) + local_np = ( + local_output.numpy() + if isinstance(local_output, paddle.Tensor) + else np.array(local_output) + ) + remote_np = ( + remote_output.numpy() + if isinstance(remote_output, paddle.Tensor) + else np.array(remote_output) + ) + np.testing.assert_allclose( + local_np, + remote_np, + atol=self.atol, + rtol=self.rtol, + equal_nan=True, + ) + + print( + f"[compare] Forward accuracy check passed for {self.api_config.config}", + flush=True, + ) except Exception as e: - print(f"[compare] Forward accuracy check failed for {self.api_config.config}, error: {e}", flush=True) + print( + f"[compare] Forward accuracy check failed for {self.api_config.config}, error: {e}", + flush=True, + ) write_to_log("accuracy_error", self.api_config.config) return False - + # 对比Backward梯度(如果存在且Forward通过) - if local_grads is not None and 'grads' in remote_data: - remote_grads = remote_data['grads'] - + if local_grads is not None and "grads" in remote_data: + remote_grads = remote_data["grads"] + try: - if isinstance(local_grads, (list, tuple)) and isinstance(remote_grads, (list, tuple)): - for i, (local_grad, remote_grad) in enumerate(zip(local_grads, remote_grads)): - if isinstance(local_grad, paddle.Tensor) and isinstance(remote_grad, paddle.Tensor): + if isinstance(local_grads, (list, tuple)) and isinstance( + remote_grads, (list, tuple) + ): + for i, (local_grad, remote_grad) in enumerate( + zip(local_grads, remote_grads) + ): + if isinstance(local_grad, paddle.Tensor) and isinstance( + remote_grad, paddle.Tensor + ): np.testing.assert_allclose( - local_grad.numpy(), remote_grad.numpy(), - atol=self.atol, rtol=self.rtol, equal_nan=True + local_grad.numpy(), + remote_grad.numpy(), + atol=self.atol, + rtol=self.rtol, + equal_nan=True, + ) + print( + f"[compare] Backward gradient[{i}] comparison passed", + flush=True, ) - print(f"[compare] Backward gradient[{i}] comparison passed", flush=True) - elif isinstance(local_grads, paddle.Tensor) and isinstance(remote_grads, paddle.Tensor): + elif isinstance(local_grads, paddle.Tensor) and isinstance( + remote_grads, paddle.Tensor + ): np.testing.assert_allclose( - local_grads.numpy(), remote_grads.numpy(), - atol=self.atol, rtol=self.rtol, equal_nan=True + local_grads.numpy(), + remote_grads.numpy(), + atol=self.atol, + rtol=self.rtol, + equal_nan=True, ) - - print(f"[compare] Backward gradient check passed for {self.api_config.config}", flush=True) + + print( + f"[compare] Backward gradient check passed for {self.api_config.config}", + flush=True, + ) except Exception as e: - print(f"[compare] Backward gradient check failed for {self.api_config.config}, error: {e}", flush=True) + print( + f"[compare] Backward gradient check failed for {self.api_config.config}, error: {e}", + flush=True, + ) return False - - print(f"[compare] Accuracy check passed for {self.api_config.config}", flush=True) + + print( + f"[compare] Accuracy check passed for {self.api_config.config}", + flush=True, + ) write_to_log("pass", self.api_config.config) return True - + except Exception as e: - print(f"[compare] Comparison failed for {self.api_config.config}, error: {e}", flush=True) + print( + f"[compare] Comparison failed for {self.api_config.config}, error: {e}", + flush=True, + ) write_to_log("accuracy_error", self.api_config.config) return False @@ -348,64 +406,86 @@ def test(self): def _test_upload_mode(self): """Upload模式:执行测试并上传结果""" print(f"[upload] Starting upload mode for {self.api_config.config}", flush=True) - + local_device_type = self._get_local_device_type() - + if local_device_type == "gpu": # GPU端:使用Paddle在GPU上执行 output, grads = self._run_paddle_on_gpu() else: # PaddleDevice端:使用Paddle在自定义设备上执行 output, grads = self._run_paddle_on_custom_device() - + if output is None: print(f"[upload] Execution failed for {self.api_config.config}", flush=True) return - + # 保存结果到本地PDTensor local_path = self._save_tensor_locally(output, grads) - + # 异步上传到BOS self._upload_to_bos(local_path) - - print(f"[upload] Upload mode completed for {self.api_config.config}", flush=True) + + print( + f"[upload] Upload mode completed for {self.api_config.config}", flush=True + ) def _test_download_mode(self): """Download模式:下载对比数据并验证""" - print(f"[download] Starting download mode for {self.api_config.config}", flush=True) - + print( + f"[download] Starting download mode for {self.api_config.config}", + flush=True, + ) + # 确定要下载的文件名 target_filename = self._get_filename(self.target_device_type) - + # 下载文件 downloaded_file = self._download_from_bos(target_filename) if downloaded_file is None: - print(f"[download] Failed to download comparison data for {self.api_config.config}", flush=True) + print( + f"[download] Failed to download comparison data for {self.api_config.config}", + flush=True, + ) return - + # 在本地设备上执行测试 local_device_type = self._get_local_device_type() - + if local_device_type == "gpu": # GPU端:使用Paddle在GPU上执行 local_output, local_grads = self._run_paddle_on_gpu() else: # PaddleDevice端:使用Paddle在自定义设备上执行 local_output, local_grads = self._run_paddle_on_custom_device() - + if local_output is None: - print(f"[download] Local execution failed for {self.api_config.config}", flush=True) + print( + f"[download] Local execution failed for {self.api_config.config}", + flush=True, + ) return - + # 与下载的结果进行对比 - success = self._compare_with_downloaded(local_output, local_grads, downloaded_file) - + success = self._compare_with_downloaded( + local_output, local_grads, downloaded_file + ) + # 清理下载的文件 downloaded_file.unlink(missing_ok=True) - - print(f"[download] Download mode completed for {self.api_config.config}", flush=True) + + print( + f"[download] Download mode completed for {self.api_config.config}", + flush=True, + ) def _test_local_mode(self): """默认模式:本地直接对比(暂不支持)""" - print(f"[local] Local mode not implemented yet for {self.api_config.config}", flush=True) - print("[info] Please specify --operation_mode=upload or --operation_mode=download", flush=True) + print( + f"[local] Local mode not implemented yet for {self.api_config.config}", + flush=True, + ) + print( + "[info] Please specify --operation_mode=upload or --operation_mode=download", + flush=True, + ) From 511871754cb8082ac8d33670ceef23324092f838 Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Tue, 9 Dec 2025 17:26:27 +0800 Subject: [PATCH 05/13] fix bugs in gpu --- tester/paddle_device_vs_gpu.py | 120 ++++++--------------------------- 1 file changed, 20 insertions(+), 100 deletions(-) diff --git a/tester/paddle_device_vs_gpu.py b/tester/paddle_device_vs_gpu.py index a74235f0..ea8f5a67 100644 --- a/tester/paddle_device_vs_gpu.py +++ b/tester/paddle_device_vs_gpu.py @@ -44,18 +44,9 @@ def _get_config_hash(self): return hashlib.md5(config_str.encode()).hexdigest()[:16] def _get_local_device_type(self): - """获取当前设备的类型""" - try: - if torch.cuda.is_available(): # 检查GPU是否可用 - return "gpu" - elif self.check_xpu_available(): - return "xpu" - elif self.check_custom_device_available(): - return self.custom_device_type - else: - return "cpu" - except: - return "cpu" + """获取当前设备的类型,优先复用 engineV2 的检测逻辑。""" + from engineV2 import detect_device_type + return detect_device_type() def _get_filename(self, device_type=None): """生成PDTensor文件名""" @@ -146,75 +137,27 @@ def _download_from_bos(self, filename): print(f"[download] Download failed: {e}", flush=True) return None - def _run_paddle_on_gpu(self): - """在GPU上运行Paddle实现""" - try: - # 设置GPU设备 - paddle.set_device("gpu:0") - - # 解析Paddle API信息 - if not self.ana_paddle_api_info(): - print("ana_paddle_api_info failed", flush=True) - return None, None - - # 生成输入数据 - if not self.gen_numpy_input(): - print("gen_numpy_input failed", flush=True) - return None, None - - if not self.gen_paddle_input(): - print("gen_paddle_input failed", flush=True) - return None, None - - # 执行Forward - paddle_output = self.paddle_api( - *tuple(self.paddle_args), **self.paddle_kwargs - ) - - # 执行Backward(如果需要) - paddle_grads = None - if self.need_check_grad(): - inputs_list = self.get_paddle_input_list() - result_outputs, result_outputs_grads = ( - self.gen_paddle_output_and_output_grad(paddle_output) - ) - if inputs_list and result_outputs and result_outputs_grads: - paddle_grads = paddle.grad( - outputs=result_outputs, - inputs=inputs_list, - grad_outputs=result_outputs_grads, - allow_unused=True, - ) - - return paddle_output, paddle_grads - - except Exception as e: - print(f"[paddle gpu error] {self.api_config.config}: {e}", flush=True) - write_to_log("paddle_error", self.api_config.config) - return None, None - - def _run_paddle_on_custom_device(self): - """在Paddle自定义设备上运行""" + def _run_paddle(self, device_type: str): + """在指定设备上运行 Paddle(统一 GPU / XPU / 自定义设备逻辑)。""" try: - paddle_device_type = "cpu" # 默认为CPU - - # 设置自定义设备 - if self.check_xpu_available(): + paddle_device_type = device_type + if device_type == "gpu": + # engineV2.py sets CUDA_VISIBLE_DEVICES, so paddle will use the correct GPU. + paddle.set_device("gpu") + elif device_type == "xpu": paddle.set_device(f"xpu:{self.xpu_device_id}") - paddle_device_type = "xpu" - elif self.check_custom_device_available(): + elif device_type == self.custom_device_type and self.check_custom_device_available(): paddle.set_device(f"{self.custom_device_type}:{self.custom_device_id}") - paddle_device_type = self.custom_device_type + elif device_type == "cpu": + paddle.set_device("cpu") else: print(f"[error] No custom device available", flush=True) return None, None - # 解析Paddle API信息 if not self.ana_paddle_api_info(): print("ana_paddle_api_info failed", flush=True) return None, None - # 生成输入数据 if not self.gen_numpy_input(): print("gen_numpy_input failed", flush=True) return None, None @@ -223,12 +166,10 @@ def _run_paddle_on_custom_device(self): print("gen_paddle_input failed", flush=True) return None, None - # 执行Forward paddle_output = self.paddle_api( *tuple(self.paddle_args), **self.paddle_kwargs ) - # 执行Backward(如果需要) paddle_grads = None if self.need_check_grad(): inputs_list = self.get_paddle_input_list() @@ -399,22 +340,18 @@ def test(self): elif self.operation_mode == "download": self._test_download_mode() else: - # 默认模式:本地直接对比 - print("[info] No operation mode specified, running in local mode") - self._test_local_mode() + print( + "[error] operation_mode 不能为空,请指定 --operation_mode=upload 或 download", + flush=True, + ) + return def _test_upload_mode(self): """Upload模式:执行测试并上传结果""" print(f"[upload] Starting upload mode for {self.api_config.config}", flush=True) local_device_type = self._get_local_device_type() - - if local_device_type == "gpu": - # GPU端:使用Paddle在GPU上执行 - output, grads = self._run_paddle_on_gpu() - else: - # PaddleDevice端:使用Paddle在自定义设备上执行 - output, grads = self._run_paddle_on_custom_device() + output, grads = self._run_paddle(local_device_type) if output is None: print(f"[upload] Execution failed for {self.api_config.config}", flush=True) @@ -451,13 +388,7 @@ def _test_download_mode(self): # 在本地设备上执行测试 local_device_type = self._get_local_device_type() - - if local_device_type == "gpu": - # GPU端:使用Paddle在GPU上执行 - local_output, local_grads = self._run_paddle_on_gpu() - else: - # PaddleDevice端:使用Paddle在自定义设备上执行 - local_output, local_grads = self._run_paddle_on_custom_device() + local_output, local_grads = self._run_paddle(local_device_type) if local_output is None: print( @@ -478,14 +409,3 @@ def _test_download_mode(self): f"[download] Download mode completed for {self.api_config.config}", flush=True, ) - - def _test_local_mode(self): - """默认模式:本地直接对比(暂不支持)""" - print( - f"[local] Local mode not implemented yet for {self.api_config.config}", - flush=True, - ) - print( - "[info] Please specify --operation_mode=upload or --operation_mode=download", - flush=True, - ) From c2eec21cc5845c8e034db4579f5fe2152dc8bfd9 Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Tue, 9 Dec 2025 18:23:59 +0800 Subject: [PATCH 06/13] iluvatar_gpu --- engineV2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/engineV2.py b/engineV2.py index 89a1d08e..8cc2aa2c 100644 --- a/engineV2.py +++ b/engineV2.py @@ -135,7 +135,7 @@ def detect_device_type() -> str: try: out = subprocess.check_output(["ixsmi"], text=True, stderr=subprocess.STDOUT) if any(re.match(r"^\|\s*\d+\s+Iluvatar", line) for line in out.splitlines()): - DEVICE_TYPE = "iluvatar" + DEVICE_TYPE = "iluvatar_gpu" DEVICE_TYPE_DETECTED = True return DEVICE_TYPE except Exception: @@ -175,7 +175,7 @@ def get_device_count() -> int: DEVICE_COUNT = len(ids) return DEVICE_COUNT - if device_type == "iluvatar": + if device_type == "iluvatar_gpu": out = subprocess.check_output(["ixsmi"], text=True, stderr=subprocess.STDOUT) ids = set() for line in out.splitlines(): @@ -214,7 +214,7 @@ def _refresh_snapshot(device_type): snapshot[dev_id] = (total_mib / 1024.0, used_mib / 1024.0) break - elif device_type == "iluvatar": + elif device_type == "iluvatar_gpu": out = subprocess.check_output(["ixsmi"], text=True, stderr=subprocess.STDOUT) lines = out.splitlines() for i, line in enumerate(lines): @@ -251,7 +251,7 @@ def get_memory_info(gpu_id): finally: pynvml.nvmlShutdown() - if device_type in ("xpu", "iluvatar"): + if device_type in ("xpu", "iluvatar_gpu"): _refresh_snapshot(device_type) if _MEM_SNAPSHOT is None or gpu_id not in _MEM_SNAPSHOT: raise RuntimeError(f"Failed to get memory info for {device_type} device {gpu_id}") From 2fa60449d6934c444c61979d5f6a720ef61b73ad Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Wed, 10 Dec 2025 17:40:51 +0800 Subject: [PATCH 07/13] update readme --- engineV2-README.md | 82 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/engineV2-README.md b/engineV2-README.md index 65912f11..d0687910 100644 --- a/engineV2-README.md +++ b/engineV2-README.md @@ -86,6 +86,12 @@ | `--timeout` | int | 单个测试用例执行超时秒数(默认 1800) | | `--show_runtime_status` | bool | 是否实时显示当前的测试进度(默认 True) | | `--random_seed` | int | numpy random的随机种子(默认为0,此时不会显式设置numpy random的seed) | +| `--custom_device_vs_gpu` | bool | 运行自定义设备与GPU的精度对比测试(默认 False) | +| `--operation_mode` | str | 操作模式:`upload` 或 `download`(仅在启用 `--custom_device_vs_gpu` 时有效) | +| `--bos_path` | str | BOS 存储路径(如 `xly-devops/liujingzong/`) | +| `--bos_conf_path` | str | BOS 配置文件路径(默认 `./conf`) | +| `--bcecmd_path` | str | bcecmd 命令行工具路径(默认 `./bcecmd`) | +| `--target_device_type` | str | 目标设备类型(如 `xpu`),仅在 `operation_mode=download` 时使用 | ### 示例命令 @@ -122,6 +128,82 @@ python engineV2.py --accuracy=True --api_config_file="tester/api_config/api_conf ``` 该脚本使用参数:`NUM_GPUS=-1, NUM_WORKERS_PER_GPU=-1, GPU_IDS="4,5,6,7"`,在后台运行程序,可在修改 `run.sh` 参数后使用 +### 自定义设备与 GPU 精度对比测试 + +#### 功能说明 + +`APITestPaddleDeviceVSGPU` 类支持跨设备的精度对比测试,特别适用于自定义设备(Custom Device)与 GPU 的一致性验证。该功能分为两个模式: + +- **Upload 模式**:在源设备(如 GPU)上执行测试,保存结果到本地,然后上传到 BOS 云存储 +- **Download 模式**:从 BOS 云存储下载参考数据,在目标设备(如 XPU)上执行测试,与参考数据进行精度对比 + +#### 工作流程 + +1. **Upload 模式工作流**: + - 在当前设备上执行 Paddle API 测试(可以是 GPU、XPU 或其他自定义设备) + - 保存 Forward 输出和 Backward 梯度到本地 PDTensor 文件 + - 文件名自动包含当前设备类型标识(如 `xpu-1210-xxx.pdtensor`) + - 使用 bcecmd 工具将文件上传到 BOS 云存储 + +2. **Download 模式工作流**: + - 指定 `--target_device_type` 参数,从 BOS 云存储下载该设备的参考数据 + - 在当前设备上执行相同的 Paddle API 测试 + - 对比 Forward 输出和 Backward 梯度,验证与参考设备的精度一致性 + +#### 命令示例 + +**场景 1:在 XPU 上执行测试并上传结果** +```bash +# 在 XPU 设备上执行,生成 xpu-1210-xxx.pdtensor 文件并上传到 BOS +python engineV2.py --custom_device_vs_gpu=True --operation_mode=upload \ + --bos_path="xly-devops/liujingzong/" \ + --bos_conf_path="./conf" \ + --bcecmd_path="./bcecmd" \ + --random_seed=1210 \ + --api_config_file="./test1.txt" \ + --gpu_id=7 +``` + +**场景 2:在 GPU 上下载 XPU 的参考数据并进行精度对比** +```bash +# 在 GPU 设备上执行,从 BOS 下载 XPU 的参考数据(xpu-1210-xxx.pdtensor) +# 然后在 GPU 上运行相同的测试,对比结果验证精度一致性 +python engineV2.py --custom_device_vs_gpu=True --operation_mode=download \ + --target_device_type=xpu \ + --bos_path="xly-devops/liujingzong/" \ + --bos_conf_path="./conf" \ + --bcecmd_path="./bcecmd" \ + --random_seed=1210 \ + --api_config_file="./test1.txt" \ + --gpu_id=7 +``` + +**场景 3:在 GPU 上执行测试并上传结果** +```bash +# 在 GPU 设备上执行,生成 gpu-1210-xxx.pdtensor 文件并上传到 BOS +python engineV2.py --custom_device_vs_gpu=True --operation_mode=upload \ + --bos_path="xly-devops/liujingzong/" \ + --bos_conf_path="./conf" \ + --bcecmd_path="./bcecmd" \ + --random_seed=1210 \ + --api_config_file="./test1.txt" \ + --gpu_id=7 +``` + +**场景 4:在 XPU 上下载 GPU 的参考数据并进行精度对比** +```bash +# 在 XPU 设备上执行,从 BOS 下载 GPU 的参考数据(gpu-1210-xxx.pdtensor) +# 然后在 XPU 上运行相同的测试,对比结果验证精度一致性 +python engineV2.py --custom_device_vs_gpu=True --operation_mode=download \ + --target_device_type=gpu \ + --bos_path="xly-devops/liujingzong/" \ + --bos_conf_path="./conf" \ + --bcecmd_path="./bcecmd" \ + --random_seed=1210 \ + --api_config_file="./test1.txt" \ + --gpu_id=7 +``` + ## 监控方法 执行 `run.sh` 后可通过以下方式监控: From 9be47d6eb370841a515c21fdd84ef855e0e766c7 Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Fri, 12 Dec 2025 14:44:45 +0800 Subject: [PATCH 08/13] =?UTF-8?q?=E5=87=8F=E5=B0=91=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- engineV2-README.md | 54 +++++++------- engineV2.py | 126 +++++++++++++++++++++------------ tester/bos_config.yaml | 12 ++++ tester/paddle_device_vs_gpu.py | 2 +- 4 files changed, 121 insertions(+), 73 deletions(-) create mode 100644 tester/bos_config.yaml diff --git a/engineV2-README.md b/engineV2-README.md index aa6cf6cb..5709503e 100644 --- a/engineV2-README.md +++ b/engineV2-README.md @@ -86,12 +86,8 @@ | `--timeout` | int | 单个测试用例执行超时秒数(默认 1800) | | `--show_runtime_status` | bool | 是否实时显示当前的测试进度(默认 True) | | `--random_seed` | int | numpy random的随机种子(默认为0,此时不会显式设置numpy random的seed) | -| `--custom_device_vs_gpu` | bool | 运行自定义设备与GPU的精度对比测试(默认 False) | -| `--operation_mode` | str | 操作模式:`upload` 或 `download`(仅在启用 `--custom_device_vs_gpu` 时有效) | -| `--bos_path` | str | BOS 存储路径(如 `xly-devops/liujingzong/`) | -| `--bos_conf_path` | str | BOS 配置文件路径(默认 `./conf`) | -| `--bcecmd_path` | str | bcecmd 命令行工具路径(默认 `./bcecmd`) | -| `--target_device_type` | str | 目标设备类型(如 `xpu`),仅在 `operation_mode=download` 时使用 | +| `--custom_device_vs_gpu` | str | 运行自定义设备与GPU的精度对比测试:`upload` 或 `download`(默认 None) | +| `--target_device_type` | str | 目标设备类型(如 `xpu`),仅在 `--custom_device_vs_gpu=download` 时使用 | | `--bitwise_alignment` | bool | 是否进行诸位对齐对比,开启后所有的api的精度对比都按照atol=0.0,rtol = 0.0的精度对比结果| @@ -151,58 +147,64 @@ python engineV2.py --accuracy=True --api_config_file="tester/api_config/api_conf - 在当前设备上执行相同的 Paddle API 测试 - 对比 Forward 输出和 Backward 梯度,验证与参考设备的精度一致性 +#### 配置文件设置 + +首先,编辑 `tester/bos_config.yaml` 配置文件: + +```yaml +# BOS 配置文件 +# 用于自定义设备与 GPU 精度对比测试的云存储配置 + +# BOS 存储路径(如:xly-devops/liujingzong/) +bos_path: "xly-devops/liujingzong/" + +# BOS 配置文件路径(bcecmd 使用的配置文件路径) +bos_conf_path: "./conf" + +# bcecmd 命令行工具路径 +bcecmd_path: "./bcecmd" +``` + #### 命令示例 **场景 1:在 XPU 上执行测试并上传结果** ```bash # 在 XPU 设备上执行,生成 xpu-1210-xxx.pdtensor 文件并上传到 BOS -python engineV2.py --custom_device_vs_gpu=True --operation_mode=upload \ - --bos_path="xly-devops/liujingzong/" \ - --bos_conf_path="./conf" \ - --bcecmd_path="./bcecmd" \ +python engineV2.py --custom_device_vs_gpu=upload \ --random_seed=1210 \ --api_config_file="./test1.txt" \ - --gpu_id=7 + --gpu_ids=7 ``` **场景 2:在 GPU 上下载 XPU 的参考数据并进行精度对比** ```bash # 在 GPU 设备上执行,从 BOS 下载 XPU 的参考数据(xpu-1210-xxx.pdtensor) # 然后在 GPU 上运行相同的测试,对比结果验证精度一致性 -python engineV2.py --custom_device_vs_gpu=True --operation_mode=download \ +python engineV2.py --custom_device_vs_gpu=download \ --target_device_type=xpu \ - --bos_path="xly-devops/liujingzong/" \ - --bos_conf_path="./conf" \ - --bcecmd_path="./bcecmd" \ --random_seed=1210 \ --api_config_file="./test1.txt" \ - --gpu_id=7 + --gpu_ids=7 ``` **场景 3:在 GPU 上执行测试并上传结果** ```bash # 在 GPU 设备上执行,生成 gpu-1210-xxx.pdtensor 文件并上传到 BOS -python engineV2.py --custom_device_vs_gpu=True --operation_mode=upload \ - --bos_path="xly-devops/liujingzong/" \ - --bos_conf_path="./conf" \ - --bcecmd_path="./bcecmd" \ +python engineV2.py --custom_device_vs_gpu=upload \ --random_seed=1210 \ --api_config_file="./test1.txt" \ - --gpu_id=7 + --gpu_ids=7 ``` **场景 4:在 XPU 上下载 GPU 的参考数据并进行精度对比** ```bash # 在 XPU 设备上执行,从 BOS 下载 GPU 的参考数据(gpu-1210-xxx.pdtensor) # 然后在 XPU 上运行相同的测试,对比结果验证精度一致性 -python engineV2.py --custom_device_vs_gpu=True --operation_mode=download \ +python engineV2.py --custom_device_vs_gpu=download \ --target_device_type=gpu \ - --bos_path="xly-devops/liujingzong/" \ - --bos_conf_path="./conf" \ - --bcecmd_path="./bcecmd" \ --random_seed=1210 \ --api_config_file="./test1.txt" \ - --gpu_id=7 + --gpu_ids=7 ``` ## 监控方法 diff --git a/engineV2.py b/engineV2.py index 116d2299..ebdeb869 100644 --- a/engineV2.py +++ b/engineV2.py @@ -12,10 +12,12 @@ from concurrent.futures import TimeoutError, as_completed from datetime import datetime from multiprocessing import Lock, Manager, cpu_count, set_start_method +from pathlib import Path from typing import TYPE_CHECKING import numpy as np import pynvml +import yaml from pebble import ProcessExpired, ProcessPool if TYPE_CHECKING: @@ -481,12 +483,16 @@ def run_test_case(api_config_str, options): "paddle_torch_gpu_performance": APITestPaddleTorchGPUPerformance, "accuracy_stable": APITestAccuracyStable, "paddle_custom_device": APITestCustomDeviceVSCPU, - "custom_device_vs_gpu": APITestPaddleDeviceVSGPU, } - test_class = next( - (cls for opt, cls in option_to_class.items() if getattr(options, opt, False)), - APITestAccuracy, # default fallback - ) + + # 处理 custom_device_vs_gpu 模式 + if options.custom_device_vs_gpu: + test_class = APITestPaddleDeviceVSGPU + else: + test_class = next( + (cls for opt, cls in option_to_class.items() if getattr(options, opt, False)), + APITestAccuracy, # default fallback + ) kwargs = {k: v for k, v in vars(options).items() if k in VALID_TEST_ARGS} case = test_class(api_config, **kwargs) try: @@ -664,33 +670,10 @@ def main(): ) parser.add_argument( "--custom_device_vs_gpu", - type=parse_bool, - default=False, - help="test paddle api on custom device vs GPU", - ) - parser.add_argument( - "--operation_mode", type=str, choices=["upload", "download"], - help="Operation mode: upload or download", - ) - parser.add_argument( - "--bos_path", - type=str, - default="", - help="BOS storage path (required when operation_mode is specified)", - ) - parser.add_argument( - "--bos_conf_path", - type=str, - default="./conf", - help="Path for bcecmd --conf-path when using BOS", - ) - parser.add_argument( - "--bcecmd_path", - type=str, - default="./bcecmd", - help="bcecmd binary path used for BOS upload/download", + default=None, + help="test paddle api on custom device vs GPU: 'upload' or 'download'", ) parser.add_argument( "--target_device_type", @@ -719,7 +702,7 @@ def main(): options.paddle_torch_gpu_performance, options.accuracy_stable, options.paddle_custom_device, - options.custom_device_vs_gpu, + options.custom_device_vs_gpu is not None, ] if len([m for m in mode if m is True]) != 1: print( @@ -732,16 +715,46 @@ def main(): "--paddle_torch_gpu_performance" "--accuracy_stable" "--paddle_custom_device" - "--custom_device_vs_gpu" - " to True.", + "--custom_device_vs_gpu=upload or --custom_device_vs_gpu=download", flush=True, ) return + + # 处理 custom_device_vs_gpu 模式的配置 + bos_config_data = None if options.custom_device_vs_gpu: - if options.operation_mode and not options.bos_path: - print("--bos_path is required when --operation_mode is specified", flush=True) + # 读取 BOS 配置文件(固定路径:tester/bos_config.yaml) + bos_config_path = Path("tester/bos_config.yaml") + if not bos_config_path.exists(): + print(f"BOS config file not found: {bos_config_path}", flush=True) + return + + try: + with open(bos_config_path, "r", encoding="utf-8") as f: + bos_config_data = yaml.safe_load(f) + + if not bos_config_data: + print(f"BOS config file is empty: {bos_config_path}", flush=True) + return + + # 验证必需的配置项 + required_keys = ["bos_path", "bos_conf_path", "bcecmd_path"] + missing_keys = [key for key in required_keys if key not in bos_config_data] + if missing_keys: + print(f"Missing required keys in BOS config: {missing_keys}", flush=True) + return + + # 将配置添加到 options 中,以便传递给测试类 + options.operation_mode = options.custom_device_vs_gpu + options.bos_path = bos_config_data["bos_path"] + options.bos_conf_path = bos_config_data["bos_conf_path"] + options.bcecmd_path = bos_config_data["bcecmd_path"] + + except Exception as e: + print(f"Failed to load BOS config file {bos_config_path}: {e}", flush=True) return - if options.operation_mode == "download" and not options.target_device_type: + + if options.custom_device_vs_gpu == "download" and not options.target_device_type: print("--target_device_type is required in download mode", flush=True) return if options.test_tol and not options.accuracy: @@ -761,7 +774,8 @@ def main(): APITestCINNVSDygraph, APITestPaddleGPUPerformance, APITestPaddleOnly, APITestPaddleTorchGPUPerformance, - APITestTorchGPUPerformance) + APITestTorchGPUPerformance, + APITestCustomDeviceVSCPU) # set log_writer set_engineV2() @@ -784,16 +798,36 @@ def main(): "accuracy_stable": APITestAccuracyStable, "paddle_custom_device": APITestCustomDeviceVSCPU, } - test_class = next( - ( - cls - for opt, cls in option_to_class.items() - if getattr(options, opt, False) - ), - APITestAccuracy, # default fallback - ) + + # 处理 custom_device_vs_gpu 模式 + if options.custom_device_vs_gpu: + from tester import APITestPaddleDeviceVSGPU + test_class = APITestPaddleDeviceVSGPU + else: + test_class = next( + ( + cls + for opt, cls in option_to_class.items() + if getattr(options, opt, False) + ), + APITestAccuracy, # default fallback + ) - if options.accuracy: + if options.custom_device_vs_gpu: + # custom_device_vs_gpu 模式需要传递额外参数 + kwargs = { + "operation_mode": options.operation_mode, + "bos_path": options.bos_path, + "bos_conf_path": options.bos_conf_path, + "bcecmd_path": options.bcecmd_path, + "random_seed": options.random_seed, + "atol": options.atol, + "rtol": options.rtol, + } + if options.target_device_type: + kwargs["target_device_type"] = options.target_device_type + case = test_class(api_config, **kwargs) + elif options.accuracy: case = test_class( api_config, test_amp=options.test_amp, diff --git a/tester/bos_config.yaml b/tester/bos_config.yaml new file mode 100644 index 00000000..a981d9c9 --- /dev/null +++ b/tester/bos_config.yaml @@ -0,0 +1,12 @@ +# BOS 配置文件 +# 用于自定义设备与 GPU 精度对比测试的云存储配置 + +# BOS 存储路径(如:xly-devops/liujingzong/) +bos_path: "xly-devops/liujingzong/" + +# BOS 配置文件路径(bcecmd 使用的配置文件路径) +bos_conf_path: "./conf" + +# bcecmd 命令行工具路径 +bcecmd_path: "./bcecmd" + diff --git a/tester/paddle_device_vs_gpu.py b/tester/paddle_device_vs_gpu.py index ea8f5a67..9079cbbb 100644 --- a/tester/paddle_device_vs_gpu.py +++ b/tester/paddle_device_vs_gpu.py @@ -58,7 +58,7 @@ def _save_tensor_locally(self, output, grads=None): """保存结果到本地PDTensor文件""" # 保存到临时文件 temp_dir = tempfile.gettempdir() - filename = self._get_filename().replace(".npz", ".pdtensor") + filename = self._get_filename() local_path = Path(temp_dir) / filename # 使用paddle.save保存张量数据 From 81d25235af546095c99a37e23a83b023e696d850 Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Tue, 16 Dec 2025 10:25:05 +0800 Subject: [PATCH 09/13] rm target-device --- engineV2-README.md | 52 +++++++++------------------------- engineV2.py | 12 -------- tester/paddle_device_vs_gpu.py | 13 ++++----- 3 files changed, 19 insertions(+), 58 deletions(-) diff --git a/engineV2-README.md b/engineV2-README.md index 5709503e..5cd8e8ab 100644 --- a/engineV2-README.md +++ b/engineV2-README.md @@ -87,7 +87,6 @@ | `--show_runtime_status` | bool | 是否实时显示当前的测试进度(默认 True) | | `--random_seed` | int | numpy random的随机种子(默认为0,此时不会显式设置numpy random的seed) | | `--custom_device_vs_gpu` | str | 运行自定义设备与GPU的精度对比测试:`upload` 或 `download`(默认 None) | -| `--target_device_type` | str | 目标设备类型(如 `xpu`),仅在 `--custom_device_vs_gpu=download` 时使用 | | `--bitwise_alignment` | bool | 是否进行诸位对齐对比,开启后所有的api的精度对比都按照atol=0.0,rtol = 0.0的精度对比结果| @@ -129,23 +128,24 @@ python engineV2.py --accuracy=True --api_config_file="tester/api_config/api_conf #### 功能说明 -`APITestPaddleDeviceVSGPU` 类支持跨设备的精度对比测试,特别适用于自定义设备(Custom Device)与 GPU 的一致性验证。该功能分为两个模式: +`APITestPaddleDeviceVSGPU` 类支持跨设备的精度对比测试,目前主要面向 **GPU 上传 + XPU(或其他设备)下载对比** 这一典型场景。该功能分为两个模式: -- **Upload 模式**:在源设备(如 GPU)上执行测试,保存结果到本地,然后上传到 BOS 云存储 -- **Download 模式**:从 BOS 云存储下载参考数据,在目标设备(如 XPU)上执行测试,与参考数据进行精度对比 +- **Upload 模式(GPU 侧)**:在 GPU 上执行测试,保存结果到本地,然后上传到 BOS 云存储 +- **Download 模式(XPU/其他设备侧)**:在 XPU 或其他设备上执行测试,从 BOS 下载 GPU 侧的参考数据进行精度对比 #### 工作流程 -1. **Upload 模式工作流**: - - 在当前设备上执行 Paddle API 测试(可以是 GPU、XPU 或其他自定义设备) +1. **Upload 模式工作流(GPU 侧)**: + - 在 GPU 设备上执行 Paddle API 测试 - 保存 Forward 输出和 Backward 梯度到本地 PDTensor 文件 - - 文件名自动包含当前设备类型标识(如 `xpu-1210-xxx.pdtensor`) + - 文件名依赖随机种子与配置哈希(如 `1210-xxx.pdtensor`) - 使用 bcecmd 工具将文件上传到 BOS 云存储 -2. **Download 模式工作流**: - - 指定 `--target_device_type` 参数,从 BOS 云存储下载该设备的参考数据 - - 在当前设备上执行相同的 Paddle API 测试 - - 对比 Forward 输出和 Backward 梯度,验证与参考设备的精度一致性 +2. **Download 模式工作流(XPU/其他设备侧)**: + - 在 XPU 或其他设备上执行相同的 Paddle API 测试 + - 使用与 GPU 侧上传时一致的随机种子和配置,构造同名 PDTensor 文件名 + - 从 BOS 云存储下载对应的 GPU 参考数据 + - 对比 Forward 输出和 Backward 梯度,验证与 GPU 的精度一致性 #### 配置文件设置 @@ -166,42 +166,18 @@ bcecmd_path: "./bcecmd" ``` #### 命令示例 - -**场景 1:在 XPU 上执行测试并上传结果** -```bash -# 在 XPU 设备上执行,生成 xpu-1210-xxx.pdtensor 文件并上传到 BOS -python engineV2.py --custom_device_vs_gpu=upload \ - --random_seed=1210 \ - --api_config_file="./test1.txt" \ - --gpu_ids=7 -``` - -**场景 2:在 GPU 上下载 XPU 的参考数据并进行精度对比** -```bash -# 在 GPU 设备上执行,从 BOS 下载 XPU 的参考数据(xpu-1210-xxx.pdtensor) -# 然后在 GPU 上运行相同的测试,对比结果验证精度一致性 -python engineV2.py --custom_device_vs_gpu=download \ - --target_device_type=xpu \ - --random_seed=1210 \ - --api_config_file="./test1.txt" \ - --gpu_ids=7 -``` - -**场景 3:在 GPU 上执行测试并上传结果** +**在 GPU 上执行测试并上传结果** ```bash -# 在 GPU 设备上执行,生成 gpu-1210-xxx.pdtensor 文件并上传到 BOS +# 在 GPU 设备上执行,生成1210-xxx.pdtensor 文件并上传到 BOS python engineV2.py --custom_device_vs_gpu=upload \ --random_seed=1210 \ --api_config_file="./test1.txt" \ --gpu_ids=7 ``` -**场景 4:在 XPU 上下载 GPU 的参考数据并进行精度对比** +**在 XPU 上下载 GPU 的参考数据并进行精度对比** ```bash -# 在 XPU 设备上执行,从 BOS 下载 GPU 的参考数据(gpu-1210-xxx.pdtensor) -# 然后在 XPU 上运行相同的测试,对比结果验证精度一致性 python engineV2.py --custom_device_vs_gpu=download \ - --target_device_type=gpu \ --random_seed=1210 \ --api_config_file="./test1.txt" \ --gpu_ids=7 diff --git a/engineV2.py b/engineV2.py index ebdeb869..2b6a7a2d 100644 --- a/engineV2.py +++ b/engineV2.py @@ -49,7 +49,6 @@ "test_tol", "operation_mode", "bos_path", - "target_device_type", "random_seed", "bos_conf_path", "bcecmd_path", @@ -675,12 +674,6 @@ def main(): default=None, help="test paddle api on custom device vs GPU: 'upload' or 'download'", ) - parser.add_argument( - "--target_device_type", - type=str, - choices=["gpu", "xpu", "iluvatar_gpu"], - help="Target device type for download mode", - ) parser.add_argument( "--bitwise_alignment", type=bool, @@ -754,9 +747,6 @@ def main(): print(f"Failed to load BOS config file {bos_config_path}: {e}", flush=True) return - if options.custom_device_vs_gpu == "download" and not options.target_device_type: - print("--target_device_type is required in download mode", flush=True) - return if options.test_tol and not options.accuracy: print(f"--test_tol takes effect when --accuracy is True.", flush=True) if options.test_backward and not options.paddle_cinn: @@ -824,8 +814,6 @@ def main(): "atol": options.atol, "rtol": options.rtol, } - if options.target_device_type: - kwargs["target_device_type"] = options.target_device_type case = test_class(api_config, **kwargs) elif options.accuracy: case = test_class( diff --git a/tester/paddle_device_vs_gpu.py b/tester/paddle_device_vs_gpu.py index 9079cbbb..de20e451 100644 --- a/tester/paddle_device_vs_gpu.py +++ b/tester/paddle_device_vs_gpu.py @@ -19,7 +19,6 @@ def __init__(self, api_config, **kwargs): # 新增参数 self.operation_mode = kwargs.get("operation_mode", None) self.bos_path = kwargs.get("bos_path", "") - self.target_device_type = kwargs.get("target_device_type", "") self.random_seed = kwargs.get("random_seed", 0) self.atol = kwargs.get("atol", 1e-2) self.rtol = kwargs.get("rtol", 1e-2) @@ -48,11 +47,9 @@ def _get_local_device_type(self): from engineV2 import detect_device_type return detect_device_type() - def _get_filename(self, device_type=None): - """生成PDTensor文件名""" - if device_type is None: - device_type = self._get_local_device_type() - return f"{device_type}-{self.random_seed}-{self._get_config_hash()}.pdtensor" + def _get_filename(self): + """生成PDTensor文件名(不再包含设备前缀,只依赖随机种子和配置哈希)""" + return f"{self.random_seed}-{self._get_config_hash()}.pdtensor" def _save_tensor_locally(self, output, grads=None): """保存结果到本地PDTensor文件""" @@ -374,8 +371,8 @@ def _test_download_mode(self): flush=True, ) - # 确定要下载的文件名 - target_filename = self._get_filename(self.target_device_type) + # 确定要下载的文件名(与 GPU 上传时保持一致) + target_filename = self._get_filename() # 下载文件 downloaded_file = self._download_from_bos(target_filename) From b4ff8949819720d542bd37b122bee1cf2fee1c50 Mon Sep 17 00:00:00 2001 From: Jingzong Liu <470699397@qq.com> Date: Tue, 16 Dec 2025 19:16:24 +0800 Subject: [PATCH 10/13] Remove duplicate APITest cases from engineV2.py Removed duplicate test cases from the test suite. --- engineV2.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/engineV2.py b/engineV2.py index cb5c82ea..c216d3aa 100644 --- a/engineV2.py +++ b/engineV2.py @@ -33,8 +33,6 @@ APITestPaddleGPUPerformance, APITestPaddleOnly, APITestPaddleTorchGPUPerformance, - APITestAccuracyStable, - APITestCustomDeviceVSCPU, APITestPaddleDeviceVSGPU, APITestTorchGPUPerformance, ) From cee4c1505e6415295b7e3263be37d01632f6762e Mon Sep 17 00:00:00 2001 From: Jingzong Liu <470699397@qq.com> Date: Tue, 16 Dec 2025 19:28:52 +0800 Subject: [PATCH 11/13] Fix formatting of APITestPaddleDeviceVSGPU entry --- engineV2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engineV2.py b/engineV2.py index c216d3aa..d9ac1209 100644 --- a/engineV2.py +++ b/engineV2.py @@ -393,7 +393,7 @@ def pid_exists(pid): APITestAccuracyStable, APITestCINNVSDygraph, APITestCustomDeviceVSCPU, - APITestPaddleDeviceVSGPU + APITestPaddleDeviceVSGPU, APITestPaddleGPUPerformance, APITestPaddleOnly, APITestPaddleTorchGPUPerformance, @@ -410,7 +410,7 @@ def pid_exists(pid): "APITestPaddleTorchGPUPerformance": APITestPaddleTorchGPUPerformance, "APITestAccuracyStable": APITestAccuracyStable, "APITestCustomDeviceVSCPU": APITestCustomDeviceVSCPU, - "APITestPaddleDeviceVSGPU": APITestPaddleDeviceVSGPU + "APITestPaddleDeviceVSGPU": APITestPaddleDeviceVSGPU, } globals().update(test_classes) From cfd88c03a695a6c539f985a04369be98602a60d7 Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Tue, 16 Dec 2025 20:26:51 +0800 Subject: [PATCH 12/13] fmt --- engineV2.py | 23 ++++++++++++----------- tester/__init__.py | 7 ++++--- tester/paddle_device_vs_gpu.py | 27 ++++++++++----------------- 3 files changed, 26 insertions(+), 31 deletions(-) diff --git a/engineV2.py b/engineV2.py index d9ac1209..bb7b8a74 100644 --- a/engineV2.py +++ b/engineV2.py @@ -30,10 +30,10 @@ APITestAccuracyStable, APITestCINNVSDygraph, APITestCustomDeviceVSCPU, + APITestPaddleDeviceVSGPU, APITestPaddleGPUPerformance, APITestPaddleOnly, APITestPaddleTorchGPUPerformance, - APITestPaddleDeviceVSGPU, APITestTorchGPUPerformance, ) @@ -480,7 +480,7 @@ def run_test_case(api_config_str, options): "accuracy_stable": APITestAccuracyStable, "paddle_custom_device": APITestCustomDeviceVSCPU, } - + # 处理 custom_device_vs_gpu 模式 if options.custom_device_vs_gpu: test_class = APITestPaddleDeviceVSGPU @@ -709,7 +709,7 @@ def main(): flush=True, ) return - + # 处理 custom_device_vs_gpu 模式的配置 bos_config_data = None if options.custom_device_vs_gpu: @@ -718,32 +718,32 @@ def main(): if not bos_config_path.exists(): print(f"BOS config file not found: {bos_config_path}", flush=True) return - + try: - with open(bos_config_path, "r", encoding="utf-8") as f: + with open(bos_config_path, encoding="utf-8") as f: bos_config_data = yaml.safe_load(f) - + if not bos_config_data: print(f"BOS config file is empty: {bos_config_path}", flush=True) return - + # 验证必需的配置项 required_keys = ["bos_path", "bos_conf_path", "bcecmd_path"] missing_keys = [key for key in required_keys if key not in bos_config_data] if missing_keys: print(f"Missing required keys in BOS config: {missing_keys}", flush=True) return - + # 将配置添加到 options 中,以便传递给测试类 options.operation_mode = options.custom_device_vs_gpu options.bos_path = bos_config_data["bos_path"] options.bos_conf_path = bos_config_data["bos_conf_path"] options.bcecmd_path = bos_config_data["bcecmd_path"] - + except Exception as e: print(f"Failed to load BOS config file {bos_config_path}: {e}", flush=True) return - + if options.test_tol and not options.accuracy: print("--test_tol takes effect when --accuracy is True.", flush=True) if options.test_backward and not options.paddle_cinn: @@ -789,10 +789,11 @@ def main(): "accuracy_stable": APITestAccuracyStable, "paddle_custom_device": APITestCustomDeviceVSCPU, } - + # 处理 custom_device_vs_gpu 模式 if options.custom_device_vs_gpu: from tester import APITestPaddleDeviceVSGPU + test_class = APITestPaddleDeviceVSGPU else: test_class = next( diff --git a/tester/__init__.py b/tester/__init__.py index e119b4f6..e721c402 100644 --- a/tester/__init__.py +++ b/tester/__init__.py @@ -27,7 +27,6 @@ from . import paddle_to_torch from .accuracy import APITestAccuracy from .accuracy_stable import APITestAccuracyStable - from .paddle_device_vs_gpu import APITestPaddleDeviceVSGPU from .api_config import ( USE_CACHED_NUMPY, APIConfig, @@ -40,6 +39,7 @@ from .base import APITestBase from .paddle_cinn_vs_dygraph import APITestCINNVSDygraph from .paddle_device_vs_cpu import APITestCustomDeviceVSCPU + from .paddle_device_vs_gpu import APITestPaddleDeviceVSGPU from .paddle_gpu_performance import APITestPaddleGPUPerformance from .paddle_only import APITestPaddleOnly from .paddle_torch_gpu_performance import APITestPaddleTorchGPUPerformance @@ -86,10 +86,11 @@ def __getattr__(name: str) -> Any: from .paddle_device_vs_cpu import APITestCustomDeviceVSCPU return APITestCustomDeviceVSCPU - elif name == 'APITestPaddleDeviceVSGPU': + elif name == "APITestPaddleDeviceVSGPU": from .paddle_device_vs_gpu import APITestPaddleDeviceVSGPU + return APITestPaddleDeviceVSGPU - elif name == 'paddle_to_torch': + elif name == "paddle_to_torch": from . import paddle_to_torch return paddle_to_torch diff --git a/tester/paddle_device_vs_gpu.py b/tester/paddle_device_vs_gpu.py index de20e451..3694e163 100644 --- a/tester/paddle_device_vs_gpu.py +++ b/tester/paddle_device_vs_gpu.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import hashlib import json import subprocess @@ -45,6 +47,7 @@ def _get_config_hash(self): def _get_local_device_type(self): """获取当前设备的类型,优先复用 engineV2 的检测逻辑。""" from engineV2 import detect_device_type + return detect_device_type() def _get_filename(self): @@ -163,15 +166,13 @@ def _run_paddle(self, device_type: str): print("gen_paddle_input failed", flush=True) return None, None - paddle_output = self.paddle_api( - *tuple(self.paddle_args), **self.paddle_kwargs - ) + paddle_output = self.paddle_api(*tuple(self.paddle_args), **self.paddle_kwargs) paddle_grads = None if self.need_check_grad(): inputs_list = self.get_paddle_input_list() - result_outputs, result_outputs_grads = ( - self.gen_paddle_output_and_output_grad(paddle_output) + result_outputs, result_outputs_grads = self.gen_paddle_output_and_output_grad( + paddle_output ) if inputs_list and result_outputs and result_outputs_grads: paddle_grads = paddle.grad( @@ -194,9 +195,7 @@ def _run_paddle(self, device_type: str): def _compare_with_downloaded(self, local_output, local_grads, downloaded_tensor): """与下载的结果进行对比""" try: - print( - f"[compare] Comparing results for {self.api_config.config}", flush=True - ) + print(f"[compare] Comparing results for {self.api_config.config}", flush=True) # 加载下载的数据 remote_data = paddle.load(str(downloaded_tensor)) @@ -219,9 +218,7 @@ def _compare_with_downloaded(self, local_output, local_grads, downloaded_tensor) remote_output, (list, tuple) ): # 列表或元组对比 - for i, (local_item, remote_item) in enumerate( - zip(local_output, remote_output) - ): + for i, (local_item, remote_item) in enumerate(zip(local_output, remote_output)): if isinstance(local_item, paddle.Tensor) and isinstance( remote_item, paddle.Tensor ): @@ -360,9 +357,7 @@ def _test_upload_mode(self): # 异步上传到BOS self._upload_to_bos(local_path) - print( - f"[upload] Upload mode completed for {self.api_config.config}", flush=True - ) + print(f"[upload] Upload mode completed for {self.api_config.config}", flush=True) def _test_download_mode(self): """Download模式:下载对比数据并验证""" @@ -395,9 +390,7 @@ def _test_download_mode(self): return # 与下载的结果进行对比 - success = self._compare_with_downloaded( - local_output, local_grads, downloaded_file - ) + success = self._compare_with_downloaded(local_output, local_grads, downloaded_file) # 清理下载的文件 downloaded_file.unlink(missing_ok=True) From 9f50c16fe45c40a6a76f861e4a6f154dd46343f5 Mon Sep 17 00:00:00 2001 From: ljz <470699397@qq.com> Date: Tue, 16 Dec 2025 22:01:04 +0800 Subject: [PATCH 13/13] option mapping --- engineV2-README.md | 9 ++++--- engineV2.py | 66 +++++++++++++++++++++++----------------------- 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/engineV2-README.md b/engineV2-README.md index 5cd8e8ab..df2ab557 100644 --- a/engineV2-README.md +++ b/engineV2-README.md @@ -86,7 +86,8 @@ | `--timeout` | int | 单个测试用例执行超时秒数(默认 1800) | | `--show_runtime_status` | bool | 是否实时显示当前的测试进度(默认 True) | | `--random_seed` | int | numpy random的随机种子(默认为0,此时不会显式设置numpy random的seed) | -| `--custom_device_vs_gpu` | str | 运行自定义设备与GPU的精度对比测试:`upload` 或 `download`(默认 None) | +| `--custom_device_vs_gpu` | bool | 启用自定义设备与GPU的精度对比测试模式(默认 False) | +| `--custom_device_vs_gpu_mode` | str | 自定义设备与GPU对比的模式:`upload` 或 `download`(默认 `upload`) | | `--bitwise_alignment` | bool | 是否进行诸位对齐对比,开启后所有的api的精度对比都按照atol=0.0,rtol = 0.0的精度对比结果| @@ -169,7 +170,8 @@ bcecmd_path: "./bcecmd" **在 GPU 上执行测试并上传结果** ```bash # 在 GPU 设备上执行,生成1210-xxx.pdtensor 文件并上传到 BOS -python engineV2.py --custom_device_vs_gpu=upload \ +python engineV2.py --custom_device_vs_gpu=True \ + --custom_device_vs_gpu_mode=upload \ --random_seed=1210 \ --api_config_file="./test1.txt" \ --gpu_ids=7 @@ -177,7 +179,8 @@ python engineV2.py --custom_device_vs_gpu=upload \ **在 XPU 上下载 GPU 的参考数据并进行精度对比** ```bash -python engineV2.py --custom_device_vs_gpu=download \ +python engineV2.py --custom_device_vs_gpu=True \ + --custom_device_vs_gpu_mode=download \ --random_seed=1210 \ --api_config_file="./test1.txt" \ --gpu_ids=7 diff --git a/engineV2.py b/engineV2.py index bb7b8a74..8e901239 100644 --- a/engineV2.py +++ b/engineV2.py @@ -479,16 +479,13 @@ def run_test_case(api_config_str, options): "paddle_torch_gpu_performance": APITestPaddleTorchGPUPerformance, "accuracy_stable": APITestAccuracyStable, "paddle_custom_device": APITestCustomDeviceVSCPU, + "custom_device_vs_gpu": APITestPaddleDeviceVSGPU, } - # 处理 custom_device_vs_gpu 模式 - if options.custom_device_vs_gpu: - test_class = APITestPaddleDeviceVSGPU - else: - test_class = next( - (cls for opt, cls in option_to_class.items() if getattr(options, opt, False)), - APITestAccuracy, # default fallback - ) + test_class = next( + (cls for opt, cls in option_to_class.items() if getattr(options, opt, False)), + APITestAccuracy, # default fallback + ) kwargs = {k: v for k, v in vars(options).items() if k in VALID_TEST_ARGS} case = test_class(api_config, **kwargs) try: @@ -666,10 +663,16 @@ def main(): ) parser.add_argument( "--custom_device_vs_gpu", + type=parse_bool, + default=False, + help="test paddle api on custom device vs GPU", + ) + parser.add_argument( + "--custom_device_vs_gpu_mode", type=str, choices=["upload", "download"], - default=None, - help="test paddle api on custom device vs GPU: 'upload' or 'download'", + default="upload", + help="operation mode for custom_device_vs_gpu: 'upload' or 'download'", ) parser.add_argument( "--bitwise_alignment", @@ -692,7 +695,7 @@ def main(): options.paddle_torch_gpu_performance, options.accuracy_stable, options.paddle_custom_device, - options.custom_device_vs_gpu is not None, + options.custom_device_vs_gpu, ] if len([m for m in mode if m is True]) != 1: print( @@ -705,7 +708,7 @@ def main(): "--paddle_torch_gpu_performance" "--accuracy_stable" "--paddle_custom_device" - "--custom_device_vs_gpu=upload or --custom_device_vs_gpu=download", + "--custom_device_vs_gpu", flush=True, ) return @@ -735,7 +738,7 @@ def main(): return # 将配置添加到 options 中,以便传递给测试类 - options.operation_mode = options.custom_device_vs_gpu + options.operation_mode = options.custom_device_vs_gpu_mode options.bos_path = bos_config_data["bos_path"] options.bos_conf_path = bos_config_data["bos_conf_path"] options.bcecmd_path = bos_config_data["bcecmd_path"] @@ -762,6 +765,8 @@ def main(): APITestAccuracy, APITestAccuracyStable, APITestCINNVSDygraph, + APITestCustomDeviceVSCPU, + APITestPaddleDeviceVSGPU, APITestPaddleGPUPerformance, APITestPaddleOnly, APITestPaddleTorchGPUPerformance, @@ -788,31 +793,26 @@ def main(): "paddle_torch_gpu_performance": APITestPaddleTorchGPUPerformance, "accuracy_stable": APITestAccuracyStable, "paddle_custom_device": APITestCustomDeviceVSCPU, + "custom_device_vs_gpu": APITestPaddleDeviceVSGPU, } - # 处理 custom_device_vs_gpu 模式 - if options.custom_device_vs_gpu: - from tester import APITestPaddleDeviceVSGPU - - test_class = APITestPaddleDeviceVSGPU - else: - test_class = next( - (cls for opt, cls in option_to_class.items() if getattr(options, opt, False)), - APITestAccuracy, # default fallback - ) + test_class = next( + (cls for opt, cls in option_to_class.items() if getattr(options, opt, False)), + APITestAccuracy, # default fallback + ) if options.custom_device_vs_gpu: # custom_device_vs_gpu 模式需要传递额外参数 - kwargs = { - "operation_mode": options.operation_mode, - "bos_path": options.bos_path, - "bos_conf_path": options.bos_conf_path, - "bcecmd_path": options.bcecmd_path, - "random_seed": options.random_seed, - "atol": options.atol, - "rtol": options.rtol, - } - case = test_class(api_config, **kwargs) + case = test_class( + api_config, + operation_mode=options.operation_mode, + bos_path=options.bos_path, + bos_conf_path=options.bos_conf_path, + bcecmd_path=options.bcecmd_path, + random_seed=options.random_seed, + atol=options.atol, + rtol=options.rtol, + ) elif options.accuracy: case = test_class( api_config,