|
| 1 | +import time |
| 2 | + |
| 3 | +import pytest |
| 4 | + |
| 5 | +from test import test_utils |
| 6 | +from test.test_utils import ec2 as ec2_utils |
| 7 | +from test.test_utils import LOGGER |
| 8 | +from packaging.version import Version |
| 9 | +from packaging.specifiers import SpecifierSet |
| 10 | + |
| 11 | + |
| 12 | +@pytest.mark.usefixtures("sagemaker") |
| 13 | +@pytest.mark.model("N/A") |
| 14 | +@pytest.mark.processor("gpu") |
| 15 | +@pytest.mark.parametrize("ec2_instance_type", ["g5.8xlarge"], indirect=True) |
| 16 | +@pytest.mark.timeout(1200) |
| 17 | +@pytest.mark.skipif( |
| 18 | + not test_utils.is_pr_context(), |
| 19 | + reason="Only run nvjpeg test in PR context to avoid block MAINLINE", |
| 20 | +) |
| 21 | +def test_nvjpeg_gpu_x86(gpu, ec2_connection, ec2_instance, x86_compatible_only, below_cuda129_only): |
| 22 | + _run_nvjpeg_test(gpu, ec2_connection) |
| 23 | + |
| 24 | + |
| 25 | +def _run_nvjpeg_test(image_uri, ec2_connection): |
| 26 | + """ |
| 27 | + Runs the nvJPEG test on the specified image URI. |
| 28 | + """ |
| 29 | + LOGGER.info(f"starting _run_nvjpeg_test with {image_uri}") |
| 30 | + |
| 31 | + account_id = test_utils.get_account_id_from_image_uri(image_uri) |
| 32 | + image_region = test_utils.get_region_from_image_uri(image_uri) |
| 33 | + repo_name, image_tag = test_utils.get_repository_and_tag_from_image_uri(image_uri) |
| 34 | + cuda_version = test_utils.get_cuda_version_from_tag(image_uri) |
| 35 | + |
| 36 | + container_name = f"{repo_name}-test-nvjpeg" |
| 37 | + |
| 38 | + LOGGER.info(f"_run_nvjpeg_test pulling: {image_uri}") |
| 39 | + test_utils.login_to_ecr_registry(ec2_connection, account_id, image_region) |
| 40 | + |
| 41 | + ec2_connection.run(f"docker pull {image_uri}", hide="out") |
| 42 | + |
| 43 | + LOGGER.info(f"_run_nvjpeg_test running: {image_uri}") |
| 44 | + ec2_connection.run( |
| 45 | + f"docker run --runtime=nvidia --gpus all --name {container_name} -id {image_uri}" |
| 46 | + ) |
| 47 | + cuda_version_numeric = cuda_version.strip("cu") |
| 48 | + if Version(cuda_version_numeric) < Version("126"): |
| 49 | + # 12.4.1 has a different branch tag in cuda-samples |
| 50 | + if Version(cuda_version_numeric) == Version("124"): |
| 51 | + git_branch_tag = "12.4.1" |
| 52 | + else: |
| 53 | + git_branch_tag = f"{cuda_version_numeric[:-1]}.{cuda_version_numeric[-1]}" |
| 54 | + test_command = ( |
| 55 | + f"git clone -b v{git_branch_tag} https://github.com/NVIDIA/cuda-samples.git && " |
| 56 | + "cd cuda-samples/Samples/4_CUDA_Libraries/nvJPEG && " |
| 57 | + "make -j$(nproc) && " |
| 58 | + "./nvJPEG" |
| 59 | + ) |
| 60 | + else: |
| 61 | + # For CUDA 12.6 and above, we use the v12.8 branch of cuda-samples |
| 62 | + # This is a workaround for the issue where the nvJPEG sample in the |
| 63 | + # cuda-samples repository does not support compute_100 architecture. |
| 64 | + # The v12.8 branch is used to avoid the issue with compute_100 architecture. |
| 65 | + # See |
| 66 | + # sample 12.9 or master branch has compute_100 arch support issue |
| 67 | + # https://github.com/NVIDIA/cuda-samples/issues/367 |
| 68 | + test_command = ( |
| 69 | + f"git clone -b v12.8 https://github.com/NVIDIA/cuda-samples.git && " |
| 70 | + "cd cuda-samples && " |
| 71 | + "mkdir build && cd build && " |
| 72 | + "cmake .. && " |
| 73 | + "cd Samples/4_CUDA_Libraries/nvJPEG && " |
| 74 | + "make -j$(nproc) && " |
| 75 | + "./nvJPEG" |
| 76 | + ) |
| 77 | + |
| 78 | + output = ec2_connection.run( |
| 79 | + f"docker exec {container_name} /bin/bash -c '{test_command}'" |
| 80 | + ).stdout.strip("\n") |
| 81 | + |
| 82 | + return output |
0 commit comments