Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 68 additions & 7 deletions engineV2.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
APITestPaddleTorchGPUPerformance,
APITestAccuracyStable,
APITestCustomDeviceVSCPU,
APITestPaddleDeviceVSGPU,
)
import torch
import paddle
Expand All @@ -38,7 +39,19 @@
os.environ["FLAGS_use_system_allocator"] = "1"
os.environ["NVIDIA_TF32_OVERRIDE"] = "0"

VALID_TEST_ARGS = {"test_amp", "test_backward", "atol", "rtol", "test_tol"}
VALID_TEST_ARGS = {
"test_amp",
"test_backward",
"atol",
"rtol",
"test_tol",
"operation_mode",
"bos_path",
"target_device_type",
"random_seed",
"bos_conf_path",
"bcecmd_path",
}

DEVICE_TYPE = None
DEVICE_TYPE_DETECTED = False
Expand Down Expand Up @@ -122,7 +135,7 @@ def detect_device_type() -> str:
try:
out = subprocess.check_output(["ixsmi"], text=True, stderr=subprocess.STDOUT)
if any(re.match(r"^\|\s*\d+\s+Iluvatar", line) for line in out.splitlines()):
DEVICE_TYPE = "iluvatar"
DEVICE_TYPE = "iluvatar_gpu"
DEVICE_TYPE_DETECTED = True
return DEVICE_TYPE
except Exception:
Expand Down Expand Up @@ -162,7 +175,7 @@ def get_device_count() -> int:
DEVICE_COUNT = len(ids)
return DEVICE_COUNT

if device_type == "iluvatar":
if device_type == "iluvatar_gpu":
out = subprocess.check_output(["ixsmi"], text=True, stderr=subprocess.STDOUT)
ids = set()
for line in out.splitlines():
Expand Down Expand Up @@ -201,7 +214,7 @@ def _refresh_snapshot(device_type):
snapshot[dev_id] = (total_mib / 1024.0, used_mib / 1024.0)
break

elif device_type == "iluvatar":
elif device_type == "iluvatar_gpu":
out = subprocess.check_output(["ixsmi"], text=True, stderr=subprocess.STDOUT)
lines = out.splitlines()
for i, line in enumerate(lines):
Expand Down Expand Up @@ -238,7 +251,7 @@ def get_memory_info(gpu_id):
finally:
pynvml.nvmlShutdown()

if device_type in ("xpu", "iluvatar"):
if device_type in ("xpu", "iluvatar_gpu"):
_refresh_snapshot(device_type)
if _MEM_SNAPSHOT is None or gpu_id not in _MEM_SNAPSHOT:
raise RuntimeError(f"Failed to get memory info for {device_type} device {gpu_id}")
Expand Down Expand Up @@ -384,7 +397,8 @@ def pid_exists(pid):
APITestPaddleOnly,
APITestPaddleTorchGPUPerformance,
APITestTorchGPUPerformance,
APITestCustomDeviceVSCPU)
APITestCustomDeviceVSCPU,
APITestPaddleDeviceVSGPU)

test_classes = {
"APIConfig": APIConfig,
Expand All @@ -395,7 +409,8 @@ def pid_exists(pid):
"APITestTorchGPUPerformance": APITestTorchGPUPerformance,
"APITestPaddleTorchGPUPerformance": APITestPaddleTorchGPUPerformance,
"APITestAccuracyStable": APITestAccuracyStable,
"APITestCustomDeviceVSCPU": APITestCustomDeviceVSCPU
"APITestCustomDeviceVSCPU": APITestCustomDeviceVSCPU,
"APITestPaddleDeviceVSGPU": APITestPaddleDeviceVSGPU
}
globals().update(test_classes)

Expand Down Expand Up @@ -466,6 +481,7 @@ def run_test_case(api_config_str, options):
"paddle_torch_gpu_performance": APITestPaddleTorchGPUPerformance,
"accuracy_stable": APITestAccuracyStable,
"paddle_custom_device": APITestCustomDeviceVSCPU,
"custom_device_vs_gpu": APITestPaddleDeviceVSGPU,
}
test_class = next(
(cls for opt, cls in option_to_class.items() if getattr(options, opt, False)),
Expand Down Expand Up @@ -646,6 +662,42 @@ def main():
default=0,
help="The numpy random seed ",
)
parser.add_argument(
"--custom_device_vs_gpu",
type=parse_bool,
default=False,
help="test paddle api on custom device vs GPU",
)
parser.add_argument(
"--operation_mode",
type=str,
choices=["upload", "download"],
help="Operation mode: upload or download",
)
parser.add_argument(
"--bos_path",
type=str,
default="",
help="BOS storage path (required when operation_mode is specified)",
)
parser.add_argument(
"--bos_conf_path",
type=str,
default="./conf",
help="Path for bcecmd --conf-path when using BOS",
)
parser.add_argument(
"--bcecmd_path",
type=str,
default="./bcecmd",
help="bcecmd binary path used for BOS upload/download",
)
parser.add_argument(
"--target_device_type",
type=str,
choices=["gpu", "xpu", "iluvatar_gpu"],
help="Target device type for download mode",
)
parser.add_argument(
"--bitwise_alignment",
type=bool,
Expand All @@ -667,6 +719,7 @@ def main():
options.paddle_torch_gpu_performance,
options.accuracy_stable,
options.paddle_custom_device,
options.custom_device_vs_gpu,
]
if len([m for m in mode if m is True]) != 1:
print(
Expand All @@ -679,10 +732,18 @@ def main():
"--paddle_torch_gpu_performance"
"--accuracy_stable"
"--paddle_custom_device"
"--custom_device_vs_gpu"
" to True.",
flush=True,
)
return
if options.custom_device_vs_gpu:
if options.operation_mode and not options.bos_path:
print("--bos_path is required when --operation_mode is specified", flush=True)
return
if options.operation_mode == "download" and not options.target_device_type:
print("--target_device_type is required in download mode", flush=True)
return
if options.test_tol and not options.accuracy:
print(f"--test_tol takes effect when --accuracy is True.", flush=True)
if options.test_backward and not options.paddle_cinn:
Expand Down
5 changes: 5 additions & 0 deletions tester/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
'APITestPaddleTorchGPUPerformance',
'APITestAccuracyStable',
'APITestCustomDeviceVSCPU',
'APITestPaddleDeviceVSGPU',
'paddle_to_torch',
'TensorConfig',
'APIConfig',
Expand All @@ -32,6 +33,7 @@
from .paddle_cinn_vs_dygraph import APITestCINNVSDygraph
from .accuracy_stable import APITestAccuracyStable
from .paddle_device_vs_cpu import APITestCustomDeviceVSCPU
from .paddle_device_vs_gpu import APITestPaddleDeviceVSGPU
from . import paddle_to_torch
from .api_config import (
TensorConfig,
Expand Down Expand Up @@ -74,6 +76,9 @@ def __getattr__(name: str) -> Any:
elif name == 'APITestCustomDeviceVSCPU':
from .paddle_device_vs_cpu import APITestCustomDeviceVSCPU
return APITestCustomDeviceVSCPU
elif name == 'APITestPaddleDeviceVSGPU':
from .paddle_device_vs_gpu import APITestPaddleDeviceVSGPU
return APITestPaddleDeviceVSGPU
elif name == 'paddle_to_torch':
from . import paddle_to_torch
return paddle_to_torch
Expand Down
Loading