diff --git a/engineV2-README.md b/engineV2-README.md index 5ce90f76..df2ab557 100644 --- a/engineV2-README.md +++ b/engineV2-README.md @@ -86,6 +86,8 @@ | `--timeout` | int | 单个测试用例执行超时秒数(默认 1800) | | `--show_runtime_status` | bool | 是否实时显示当前的测试进度(默认 True) | | `--random_seed` | int | numpy random的随机种子(默认为0,此时不会显式设置numpy random的seed) | +| `--custom_device_vs_gpu` | bool | 启用自定义设备与GPU的精度对比测试模式(默认 False) | +| `--custom_device_vs_gpu_mode` | str | 自定义设备与GPU对比的模式:`upload` 或 `download`(默认 `upload`) | | `--bitwise_alignment` | bool | 是否进行诸位对齐对比,开启后所有的api的精度对比都按照atol=0.0,rtol = 0.0的精度对比结果| @@ -123,6 +125,67 @@ python engineV2.py --accuracy=True --api_config_file="tester/api_config/api_conf ``` 该脚本使用参数:`NUM_GPUS=-1, NUM_WORKERS_PER_GPU=-1, GPU_IDS="4,5,6,7"`,在后台运行程序,可在修改 `run.sh` 参数后使用 +### 自定义设备与 GPU 精度对比测试 + +#### 功能说明 + +`APITestPaddleDeviceVSGPU` 类支持跨设备的精度对比测试,目前主要面向 **GPU 上传 + XPU(或其他设备)下载对比** 这一典型场景。该功能分为两个模式: + +- **Upload 模式(GPU 侧)**:在 GPU 上执行测试,保存结果到本地,然后上传到 BOS 云存储 +- **Download 模式(XPU/其他设备侧)**:在 XPU 或其他设备上执行测试,从 BOS 下载 GPU 侧的参考数据进行精度对比 + +#### 工作流程 + +1. **Upload 模式工作流(GPU 侧)**: + - 在 GPU 设备上执行 Paddle API 测试 + - 保存 Forward 输出和 Backward 梯度到本地 PDTensor 文件 + - 文件名依赖随机种子与配置哈希(如 `1210-xxx.pdtensor`) + - 使用 bcecmd 工具将文件上传到 BOS 云存储 + +2. **Download 模式工作流(XPU/其他设备侧)**: + - 在 XPU 或其他设备上执行相同的 Paddle API 测试 + - 使用与 GPU 侧上传时一致的随机种子和配置,构造同名 PDTensor 文件名 + - 从 BOS 云存储下载对应的 GPU 参考数据 + - 对比 Forward 输出和 Backward 梯度,验证与 GPU 的精度一致性 + +#### 配置文件设置 + +首先,编辑 `tester/bos_config.yaml` 配置文件: + +```yaml +# BOS 配置文件 +# 用于自定义设备与 GPU 精度对比测试的云存储配置 + +# BOS 存储路径(如:xly-devops/liujingzong/) +bos_path: "xly-devops/liujingzong/" + +# BOS 配置文件路径(bcecmd 使用的配置文件路径) +bos_conf_path: "./conf" + +# bcecmd 命令行工具路径 +bcecmd_path: "./bcecmd" +``` + +#### 命令示例 +**在 GPU 上执行测试并上传结果** +```bash +# 在 GPU 设备上执行,生成1210-xxx.pdtensor 文件并上传到 BOS +python engineV2.py --custom_device_vs_gpu=True \ + --custom_device_vs_gpu_mode=upload \ + --random_seed=1210 \ + --api_config_file="./test1.txt" \ + --gpu_ids=7 +``` + +**在 XPU 上下载 GPU 的参考数据并进行精度对比** +```bash +python engineV2.py --custom_device_vs_gpu=True \ + --custom_device_vs_gpu_mode=download \ + --random_seed=1210 \ + --api_config_file="./test1.txt" \ + --gpu_ids=7 +``` + ## 监控方法 执行 `run.sh` 后可通过以下方式监控: diff --git a/engineV2.py b/engineV2.py index 96c14e6a..8e901239 100644 --- a/engineV2.py +++ b/engineV2.py @@ -13,10 +13,12 @@ from concurrent.futures import TimeoutError, as_completed from datetime import datetime from multiprocessing import Lock, Manager, cpu_count, set_start_method +from pathlib import Path from typing import TYPE_CHECKING import numpy as np import pynvml +import yaml from pebble import ProcessExpired, ProcessPool if TYPE_CHECKING: @@ -28,6 +30,7 @@ APITestAccuracyStable, APITestCINNVSDygraph, APITestCustomDeviceVSCPU, + APITestPaddleDeviceVSGPU, APITestPaddleGPUPerformance, APITestPaddleOnly, APITestPaddleTorchGPUPerformance, @@ -39,7 +42,18 @@ os.environ["FLAGS_USE_SYSTEM_ALLOCATOR"] = "1" os.environ["NVIDIA_TF32_OVERRIDE"] = "0" -VALID_TEST_ARGS = {"test_amp", "test_backward", "atol", "rtol", "test_tol"} +VALID_TEST_ARGS = { + "test_amp", + "test_backward", + "atol", + "rtol", + "test_tol", + "operation_mode", + "bos_path", + "random_seed", + "bos_conf_path", + "bcecmd_path", +} DEVICE_TYPE = None DEVICE_TYPE_DETECTED = False @@ -123,7 +137,7 @@ def detect_device_type() -> str: try: out = subprocess.check_output(["ixsmi"], text=True, stderr=subprocess.STDOUT) if any(re.match(r"^\|\s*\d+\s+Iluvatar", line) for line in out.splitlines()): - DEVICE_TYPE = "iluvatar" + DEVICE_TYPE = "iluvatar_gpu" DEVICE_TYPE_DETECTED = True return DEVICE_TYPE except Exception: @@ -164,7 +178,7 @@ def get_device_count() -> int: DEVICE_COUNT = len(ids) return DEVICE_COUNT - if device_type == "iluvatar": + if device_type == "iluvatar_gpu": out = subprocess.check_output(["ixsmi"], text=True, stderr=subprocess.STDOUT) ids = set() for line in out.splitlines(): @@ -202,7 +216,7 @@ def _refresh_snapshot(device_type): snapshot[dev_id] = (total_mib / 1024.0, used_mib / 1024.0) break - elif device_type == "iluvatar": + elif device_type == "iluvatar_gpu": out = subprocess.check_output(["ixsmi"], text=True, stderr=subprocess.STDOUT) lines = out.splitlines() for i, line in enumerate(lines): @@ -240,7 +254,7 @@ def get_memory_info(gpu_id): finally: pynvml.nvmlShutdown() - if device_type in ("xpu", "iluvatar"): + if device_type in ("xpu", "iluvatar_gpu"): _refresh_snapshot(device_type) if _MEM_SNAPSHOT is None or gpu_id not in _MEM_SNAPSHOT: raise RuntimeError(f"Failed to get memory info for {device_type} device {gpu_id}") @@ -379,6 +393,7 @@ def pid_exists(pid): APITestAccuracyStable, APITestCINNVSDygraph, APITestCustomDeviceVSCPU, + APITestPaddleDeviceVSGPU, APITestPaddleGPUPerformance, APITestPaddleOnly, APITestPaddleTorchGPUPerformance, @@ -395,6 +410,7 @@ def pid_exists(pid): "APITestPaddleTorchGPUPerformance": APITestPaddleTorchGPUPerformance, "APITestAccuracyStable": APITestAccuracyStable, "APITestCustomDeviceVSCPU": APITestCustomDeviceVSCPU, + "APITestPaddleDeviceVSGPU": APITestPaddleDeviceVSGPU, } globals().update(test_classes) @@ -463,7 +479,9 @@ def run_test_case(api_config_str, options): "paddle_torch_gpu_performance": APITestPaddleTorchGPUPerformance, "accuracy_stable": APITestAccuracyStable, "paddle_custom_device": APITestCustomDeviceVSCPU, + "custom_device_vs_gpu": APITestPaddleDeviceVSGPU, } + test_class = next( (cls for opt, cls in option_to_class.items() if getattr(options, opt, False)), APITestAccuracy, # default fallback @@ -643,6 +661,19 @@ def main(): default=0, help="The numpy random seed ", ) + parser.add_argument( + "--custom_device_vs_gpu", + type=parse_bool, + default=False, + help="test paddle api on custom device vs GPU", + ) + parser.add_argument( + "--custom_device_vs_gpu_mode", + type=str, + choices=["upload", "download"], + default="upload", + help="operation mode for custom_device_vs_gpu: 'upload' or 'download'", + ) parser.add_argument( "--bitwise_alignment", type=bool, @@ -664,6 +695,7 @@ def main(): options.paddle_torch_gpu_performance, options.accuracy_stable, options.paddle_custom_device, + options.custom_device_vs_gpu, ] if len([m for m in mode if m is True]) != 1: print( @@ -676,10 +708,45 @@ def main(): "--paddle_torch_gpu_performance" "--accuracy_stable" "--paddle_custom_device" - " to True.", + "--custom_device_vs_gpu", flush=True, ) return + + # 处理 custom_device_vs_gpu 模式的配置 + bos_config_data = None + if options.custom_device_vs_gpu: + # 读取 BOS 配置文件(固定路径:tester/bos_config.yaml) + bos_config_path = Path("tester/bos_config.yaml") + if not bos_config_path.exists(): + print(f"BOS config file not found: {bos_config_path}", flush=True) + return + + try: + with open(bos_config_path, encoding="utf-8") as f: + bos_config_data = yaml.safe_load(f) + + if not bos_config_data: + print(f"BOS config file is empty: {bos_config_path}", flush=True) + return + + # 验证必需的配置项 + required_keys = ["bos_path", "bos_conf_path", "bcecmd_path"] + missing_keys = [key for key in required_keys if key not in bos_config_data] + if missing_keys: + print(f"Missing required keys in BOS config: {missing_keys}", flush=True) + return + + # 将配置添加到 options 中,以便传递给测试类 + options.operation_mode = options.custom_device_vs_gpu_mode + options.bos_path = bos_config_data["bos_path"] + options.bos_conf_path = bos_config_data["bos_conf_path"] + options.bcecmd_path = bos_config_data["bcecmd_path"] + + except Exception as e: + print(f"Failed to load BOS config file {bos_config_path}: {e}", flush=True) + return + if options.test_tol and not options.accuracy: print("--test_tol takes effect when --accuracy is True.", flush=True) if options.test_backward and not options.paddle_cinn: @@ -698,6 +765,8 @@ def main(): APITestAccuracy, APITestAccuracyStable, APITestCINNVSDygraph, + APITestCustomDeviceVSCPU, + APITestPaddleDeviceVSGPU, APITestPaddleGPUPerformance, APITestPaddleOnly, APITestPaddleTorchGPUPerformance, @@ -724,13 +793,27 @@ def main(): "paddle_torch_gpu_performance": APITestPaddleTorchGPUPerformance, "accuracy_stable": APITestAccuracyStable, "paddle_custom_device": APITestCustomDeviceVSCPU, + "custom_device_vs_gpu": APITestPaddleDeviceVSGPU, } + test_class = next( (cls for opt, cls in option_to_class.items() if getattr(options, opt, False)), APITestAccuracy, # default fallback ) - if options.accuracy: + if options.custom_device_vs_gpu: + # custom_device_vs_gpu 模式需要传递额外参数 + case = test_class( + api_config, + operation_mode=options.operation_mode, + bos_path=options.bos_path, + bos_conf_path=options.bos_conf_path, + bcecmd_path=options.bcecmd_path, + random_seed=options.random_seed, + atol=options.atol, + rtol=options.rtol, + ) + elif options.accuracy: case = test_class( api_config, test_amp=options.test_amp, diff --git a/tester/__init__.py b/tester/__init__.py index 73e0969e..e721c402 100644 --- a/tester/__init__.py +++ b/tester/__init__.py @@ -10,6 +10,7 @@ "APITestBase", "APITestCINNVSDygraph", "APITestCustomDeviceVSCPU", + "APITestPaddleDeviceVSGPU", "APITestPaddleGPUPerformance", "APITestPaddleOnly", "APITestPaddleTorchGPUPerformance", @@ -38,6 +39,7 @@ from .base import APITestBase from .paddle_cinn_vs_dygraph import APITestCINNVSDygraph from .paddle_device_vs_cpu import APITestCustomDeviceVSCPU + from .paddle_device_vs_gpu import APITestPaddleDeviceVSGPU from .paddle_gpu_performance import APITestPaddleGPUPerformance from .paddle_only import APITestPaddleOnly from .paddle_torch_gpu_performance import APITestPaddleTorchGPUPerformance @@ -84,6 +86,10 @@ def __getattr__(name: str) -> Any: from .paddle_device_vs_cpu import APITestCustomDeviceVSCPU return APITestCustomDeviceVSCPU + elif name == "APITestPaddleDeviceVSGPU": + from .paddle_device_vs_gpu import APITestPaddleDeviceVSGPU + + return APITestPaddleDeviceVSGPU elif name == "paddle_to_torch": from . import paddle_to_torch diff --git a/tester/bos_config.yaml b/tester/bos_config.yaml new file mode 100644 index 00000000..a981d9c9 --- /dev/null +++ b/tester/bos_config.yaml @@ -0,0 +1,12 @@ +# BOS 配置文件 +# 用于自定义设备与 GPU 精度对比测试的云存储配置 + +# BOS 存储路径(如:xly-devops/liujingzong/) +bos_path: "xly-devops/liujingzong/" + +# BOS 配置文件路径(bcecmd 使用的配置文件路径) +bos_conf_path: "./conf" + +# bcecmd 命令行工具路径 +bcecmd_path: "./bcecmd" + diff --git a/tester/paddle_device_vs_gpu.py b/tester/paddle_device_vs_gpu.py new file mode 100644 index 00000000..3694e163 --- /dev/null +++ b/tester/paddle_device_vs_gpu.py @@ -0,0 +1,401 @@ +from __future__ import annotations + +import hashlib +import json +import subprocess +import tempfile +from pathlib import Path + +import numpy as np +import paddle + +from .api_config.log_writer import write_to_log +from .paddle_device_vs_cpu import APITestCustomDeviceVSCPU + + +class APITestPaddleDeviceVSGPU(APITestCustomDeviceVSCPU): + def __init__(self, api_config, **kwargs): + # 继承 CustomDevice vs CPU 的基本功能 + super().__init__(api_config, **kwargs) + + # 新增参数 + self.operation_mode = kwargs.get("operation_mode", None) + self.bos_path = kwargs.get("bos_path", "") + self.random_seed = kwargs.get("random_seed", 0) + self.atol = kwargs.get("atol", 1e-2) + self.rtol = kwargs.get("rtol", 1e-2) + self.bcecmd_path = Path(kwargs.get("bcecmd_path", "./bcecmd")).resolve() + self.bos_conf_path = kwargs.get("bos_conf_path", "./conf") + + # 设置随机种子确保一致性 + if self.random_seed != 0: + np.random.seed(self.random_seed) + paddle.seed(self.random_seed) + + def _get_config_hash(self): + """生成API配置的哈希值,用于文件名""" + config_str = json.dumps( + { + "api_name": self.api_config.api_name, + "args": [str(arg) for arg in self.api_config.args], + "kwargs": {k: str(v) for k, v in self.api_config.kwargs.items()}, + }, + sort_keys=True, + ) + return hashlib.md5(config_str.encode()).hexdigest()[:16] + + def _get_local_device_type(self): + """获取当前设备的类型,优先复用 engineV2 的检测逻辑。""" + from engineV2 import detect_device_type + + return detect_device_type() + + def _get_filename(self): + """生成PDTensor文件名(不再包含设备前缀,只依赖随机种子和配置哈希)""" + return f"{self.random_seed}-{self._get_config_hash()}.pdtensor" + + def _save_tensor_locally(self, output, grads=None): + """保存结果到本地PDTensor文件""" + # 保存到临时文件 + temp_dir = tempfile.gettempdir() + filename = self._get_filename() + local_path = Path(temp_dir) / filename + + # 使用paddle.save保存张量数据 + save_data = {"output": output} + if grads is not None: + save_data["grads"] = grads + + paddle.save(save_data, str(local_path)) + print(f"[upload] Saved pdtensor file: {local_path}", flush=True) + return local_path + + def _build_bos_path(self, filename: str) -> str: + cleaned = self.bos_path.strip().lstrip("/").rstrip("/") + return f"bos:/{cleaned}/{filename}" + + def _bcecmd_cp(self, src: str, dst: str, action: str): + """使用指定的 bcecmd 命令执行 cp 操作""" + cmd = [ + str(self.bcecmd_path), + "--conf-path", + self.bos_conf_path, + "bos", + "cp", + src, + dst, + ] + print(f"[{action}] Running command: {' '.join(cmd)}", flush=True) + return subprocess.run(cmd, capture_output=True, text=True, timeout=300) + + def _upload_to_bos(self, local_path): + """使用 bcecmd 上传文件到 BOS""" + if not self.bos_path: + print(f"[upload] No bos_path specified, skip upload", flush=True) + return + + remote_path = self._build_bos_path(local_path.name) + try: + result = self._bcecmd_cp(str(local_path), remote_path, "upload") + if result.returncode == 0: + print(f"[upload] Upload succeeded: {remote_path}", flush=True) + local_path.unlink(missing_ok=True) + else: + print( + f"[upload] Upload failed: {remote_path}, stderr: {result.stderr}", + flush=True, + ) + except Exception as e: + print(f"[upload] Upload failed: {e}", flush=True) + + def _download_from_bos(self, filename): + """使用 bcecmd 从 BOS 下载文件""" + if not self.bos_path: + print(f"[download] No bos_path specified, skip download", flush=True) + return None + + temp_dir = tempfile.gettempdir() + local_path = Path(temp_dir) / filename + + if local_path.exists(): + print(f"[download] File already exists locally: {local_path}", flush=True) + return local_path + + remote_path = self._build_bos_path(filename) + try: + result = self._bcecmd_cp(remote_path, str(local_path), "download") + if result.returncode == 0: + print(f"[download] Download succeeded: {local_path}", flush=True) + return local_path + else: + print( + f"[download] Download failed: {remote_path}, stderr: {result.stderr}", + flush=True, + ) + return None + except Exception as e: + print(f"[download] Download failed: {e}", flush=True) + return None + + def _run_paddle(self, device_type: str): + """在指定设备上运行 Paddle(统一 GPU / XPU / 自定义设备逻辑)。""" + try: + paddle_device_type = device_type + if device_type == "gpu": + # engineV2.py sets CUDA_VISIBLE_DEVICES, so paddle will use the correct GPU. + paddle.set_device("gpu") + elif device_type == "xpu": + paddle.set_device(f"xpu:{self.xpu_device_id}") + elif device_type == self.custom_device_type and self.check_custom_device_available(): + paddle.set_device(f"{self.custom_device_type}:{self.custom_device_id}") + elif device_type == "cpu": + paddle.set_device("cpu") + else: + print(f"[error] No custom device available", flush=True) + return None, None + + if not self.ana_paddle_api_info(): + print("ana_paddle_api_info failed", flush=True) + return None, None + + if not self.gen_numpy_input(): + print("gen_numpy_input failed", flush=True) + return None, None + + if not self.gen_paddle_input(): + print("gen_paddle_input failed", flush=True) + return None, None + + paddle_output = self.paddle_api(*tuple(self.paddle_args), **self.paddle_kwargs) + + paddle_grads = None + if self.need_check_grad(): + inputs_list = self.get_paddle_input_list() + result_outputs, result_outputs_grads = self.gen_paddle_output_and_output_grad( + paddle_output + ) + if inputs_list and result_outputs and result_outputs_grads: + paddle_grads = paddle.grad( + outputs=result_outputs, + inputs=inputs_list, + grad_outputs=result_outputs_grads, + allow_unused=True, + ) + + return paddle_output, paddle_grads + + except Exception as e: + print( + f"[paddle {paddle_device_type} error] {self.api_config.config}: {e}", + flush=True, + ) + write_to_log("paddle_error", self.api_config.config) + return None, None + + def _compare_with_downloaded(self, local_output, local_grads, downloaded_tensor): + """与下载的结果进行对比""" + try: + print(f"[compare] Comparing results for {self.api_config.config}", flush=True) + + # 加载下载的数据 + remote_data = paddle.load(str(downloaded_tensor)) + remote_output = remote_data["output"] + + # 对比Forward输出(直接使用Paddle对比) + try: + if isinstance(local_output, paddle.Tensor) and isinstance( + remote_output, paddle.Tensor + ): + # 使用Paddle的对比方法 + np.testing.assert_allclose( + local_output.numpy(), + remote_output.numpy(), + atol=self.atol, + rtol=self.rtol, + equal_nan=True, + ) + elif isinstance(local_output, (list, tuple)) and isinstance( + remote_output, (list, tuple) + ): + # 列表或元组对比 + for i, (local_item, remote_item) in enumerate(zip(local_output, remote_output)): + if isinstance(local_item, paddle.Tensor) and isinstance( + remote_item, paddle.Tensor + ): + np.testing.assert_allclose( + local_item.numpy(), + remote_item.numpy(), + atol=self.atol, + rtol=self.rtol, + equal_nan=True, + ) + print( + f"[compare] Forward output[{i}] comparison passed", + flush=True, + ) + else: + # 其他情况,尝试转换为numpy对比 + local_np = ( + local_output.numpy() + if isinstance(local_output, paddle.Tensor) + else np.array(local_output) + ) + remote_np = ( + remote_output.numpy() + if isinstance(remote_output, paddle.Tensor) + else np.array(remote_output) + ) + np.testing.assert_allclose( + local_np, + remote_np, + atol=self.atol, + rtol=self.rtol, + equal_nan=True, + ) + + print( + f"[compare] Forward accuracy check passed for {self.api_config.config}", + flush=True, + ) + except Exception as e: + print( + f"[compare] Forward accuracy check failed for {self.api_config.config}, error: {e}", + flush=True, + ) + write_to_log("accuracy_error", self.api_config.config) + return False + + # 对比Backward梯度(如果存在且Forward通过) + if local_grads is not None and "grads" in remote_data: + remote_grads = remote_data["grads"] + + try: + if isinstance(local_grads, (list, tuple)) and isinstance( + remote_grads, (list, tuple) + ): + for i, (local_grad, remote_grad) in enumerate( + zip(local_grads, remote_grads) + ): + if isinstance(local_grad, paddle.Tensor) and isinstance( + remote_grad, paddle.Tensor + ): + np.testing.assert_allclose( + local_grad.numpy(), + remote_grad.numpy(), + atol=self.atol, + rtol=self.rtol, + equal_nan=True, + ) + print( + f"[compare] Backward gradient[{i}] comparison passed", + flush=True, + ) + elif isinstance(local_grads, paddle.Tensor) and isinstance( + remote_grads, paddle.Tensor + ): + np.testing.assert_allclose( + local_grads.numpy(), + remote_grads.numpy(), + atol=self.atol, + rtol=self.rtol, + equal_nan=True, + ) + + print( + f"[compare] Backward gradient check passed for {self.api_config.config}", + flush=True, + ) + except Exception as e: + print( + f"[compare] Backward gradient check failed for {self.api_config.config}, error: {e}", + flush=True, + ) + return False + + print( + f"[compare] Accuracy check passed for {self.api_config.config}", + flush=True, + ) + write_to_log("pass", self.api_config.config) + return True + + except Exception as e: + print( + f"[compare] Comparison failed for {self.api_config.config}, error: {e}", + flush=True, + ) + write_to_log("accuracy_error", self.api_config.config) + return False + + def test(self): + """Main test function""" + if self.operation_mode == "upload": + self._test_upload_mode() + elif self.operation_mode == "download": + self._test_download_mode() + else: + print( + "[error] operation_mode 不能为空,请指定 --operation_mode=upload 或 download", + flush=True, + ) + return + + def _test_upload_mode(self): + """Upload模式:执行测试并上传结果""" + print(f"[upload] Starting upload mode for {self.api_config.config}", flush=True) + + local_device_type = self._get_local_device_type() + output, grads = self._run_paddle(local_device_type) + + if output is None: + print(f"[upload] Execution failed for {self.api_config.config}", flush=True) + return + + # 保存结果到本地PDTensor + local_path = self._save_tensor_locally(output, grads) + + # 异步上传到BOS + self._upload_to_bos(local_path) + + print(f"[upload] Upload mode completed for {self.api_config.config}", flush=True) + + def _test_download_mode(self): + """Download模式:下载对比数据并验证""" + print( + f"[download] Starting download mode for {self.api_config.config}", + flush=True, + ) + + # 确定要下载的文件名(与 GPU 上传时保持一致) + target_filename = self._get_filename() + + # 下载文件 + downloaded_file = self._download_from_bos(target_filename) + if downloaded_file is None: + print( + f"[download] Failed to download comparison data for {self.api_config.config}", + flush=True, + ) + return + + # 在本地设备上执行测试 + local_device_type = self._get_local_device_type() + local_output, local_grads = self._run_paddle(local_device_type) + + if local_output is None: + print( + f"[download] Local execution failed for {self.api_config.config}", + flush=True, + ) + return + + # 与下载的结果进行对比 + success = self._compare_with_downloaded(local_output, local_grads, downloaded_file) + + # 清理下载的文件 + downloaded_file.unlink(missing_ok=True) + + print( + f"[download] Download mode completed for {self.api_config.config}", + flush=True, + )