aws
diff --git a/‎src/benchmark_metrics.py‎
Lines changed: 27 additions & 12 deletions b/‎src/benchmark_metrics.py‎
Lines changed: 27 additions & 12 deletions
diff --git a/‎test/dlc_tests/benchmark/ec2/pytorch/inference/test_performance_pytorch_inference.py‎
Lines changed: 130 additions & 17 deletions b/‎test/dlc_tests/benchmark/ec2/pytorch/inference/test_performance_pytorch_inference.py‎
Lines changed: 130 additions & 17 deletions
diff --git a/‎test/dlc_tests/conftest.py‎
Lines changed: 1 addition & 0 deletions b/‎test/dlc_tests/conftest.py‎
Lines changed: 1 addition & 0 deletions
@@ -79,22 +79,37 @@
 # p99 latency, unit: millisecond
 PYTORCH_INFERENCE_CPU_THRESHOLD = {
     ">=1.0": {
-        "ResNet18": 0.08,
-        "VGG13": 0.45,
-        "MobileNetV2": 0.06,
-        "GoogleNet": 0.12,
-        "DenseNet121": 0.15,
-        "InceptionV3": 0.25,
+        "ResNet18": 80.0,
+        "MobileNet_V2": 60.0,
+        "GoogLeNet": 120.0,
+        "DenseNet121": 200.0,
+        "Inception_V3": 250.0,
+        "DistilBert_128": 200.0,
+        "Bert_128": 300.0,
+        "Roberta_128": 300.0,
+        "ASR": 300.0,
+        "All-MPNet_128": 300.0,
     }
 }
 PYTORCH_INFERENCE_GPU_THRESHOLD = {
     ">=1.0": {
-        "ResNet18": 0.0075,
-        "VGG13": 0.004,
-        "MobileNetV2": 0.013,
-        "GoogleNet": 0.018,
-        "DenseNet121": 0.04,
-        "InceptionV3": 0.03,
+        "ResNet18": 7.5,
+        "VGG13": 4.0,
+        "MobileNet_V2": 13.0,
+        "GoogLeNet": 18.0,
+        "DenseNet121": 40.0,
+        "Inception_V3": 30.0,
+        "ResNet50": 15.0,
+        "ViT_B_16": 20.0,
+        "DistilBert_128": 10.0,
+        "DistilBert_256": 11.0,
+        "Bert_128": 20.0,
+        "Bert_256": 20.0,
+        "Roberta_128": 20.0,
+        "Roberta_256": 20.0,
+        "ASR": 20.0,
+        "All-MPNet_128": 20.0,
+        "All-MPNet_256": 30.0,
     }
 }
 
 
@@ -1,6 +1,8 @@
 import os
 import time
 import pytest
+import logging
+import sys
 
 from src.benchmark_metrics import (
     PYTORCH_INFERENCE_GPU_THRESHOLD,
@@ -10,48 +12,122 @@
 from test.test_utils import (
     CONTAINER_TESTS_PREFIX,
     get_framework_and_version_from_tag,
-    UL20_CPU_ARM64_US_WEST_2,
+    UL22_BASE_ARM64_DLAMI_US_WEST_2,
     login_to_ecr_registry,
     get_account_id_from_image_uri,
     LOGGER,
 )
 from test.test_utils.ec2 import (
     ec2_performance_upload_result_to_s3_and_validate,
     post_process_inference,
+    get_ec2_instance_type,
 )
 
+
+LOGGER = logging.getLogger(__name__)
+LOGGER.setLevel(logging.INFO)
+LOGGER.addHandler(logging.StreamHandler(sys.stderr))
+
+
 PT_PERFORMANCE_INFERENCE_SCRIPT = os.path.join(
     CONTAINER_TESTS_PREFIX, "benchmark", "run_pytorch_inference_performance.py"
 )
-PT_PERFORMANCE_INFERENCE_CPU_CMD = f"{PT_PERFORMANCE_INFERENCE_SCRIPT} --iterations 500"
+PT_PERFORMANCE_INFERENCE_CPU_CMD = f"{PT_PERFORMANCE_INFERENCE_SCRIPT} --iterations 500 "
 PT_PERFORMANCE_INFERENCE_GPU_CMD = f"{PT_PERFORMANCE_INFERENCE_SCRIPT} --iterations 1000 --gpu"
+# Use the original p3.16xlarge instance, consider if use single gpu instance like g4dn.4xlarge, g5.4xlarge
+PT_EC2_GPU_INSTANCE_TYPE = ["p3.16xlarge"]
+PT_EC2_CPU_INSTANCE_TYPE = get_ec2_instance_type(default="c5.18xlarge", processor="cpu")
+# c6g.4xlarge c6g.8xlarge reaches the 100% cpu usage for the benchmark when run VGG13 model
+PT_EC2_CPU_ARM64_INSTANCE_TYPES = ["c7g.4xlarge", "c8g.4xlarge", "m7g.4xlarge", "r8g.4xlarge"]
+PT_EC2_GPU_ARM64_INSTANCE_TYPE = get_ec2_instance_type(
+    default="g5g.4xlarge", processor="gpu", arch_type="arm64"
+)
 
 
-@pytest.mark.model("resnet18, VGG13, MobileNetV2, GoogleNet, DenseNet121, InceptionV3")
-@pytest.mark.parametrize("ec2_instance_type", ["p3.16xlarge"], indirect=True)
+@pytest.mark.model(
+    "VGG13, MobileNet_V2, GoogLeNet, DenseNet121, Inception_V3, ResNet18, ResNet50, ViT_B_16, Bert_128, Bert_256, Roberta_128, Roberta_256, DistilBert_128, DistilBert_256, All-MPNet_128, All-MPNet_256, ASR"
+)
+@pytest.mark.parametrize("ec2_instance_type", PT_EC2_GPU_INSTANCE_TYPE, indirect=True)
 @pytest.mark.team("conda")
-def test_performance_ec2_pytorch_inference_gpu(pytorch_inference, ec2_connection, region, gpu_only):
+def test_performance_ec2_pytorch_inference_gpu(
+    pytorch_inference, ec2_connection, region, gpu_only, ec2_instance_type
+):
     _, framework_version = get_framework_and_version_from_tag(pytorch_inference)
     threshold = get_threshold_for_image(framework_version, PYTORCH_INFERENCE_GPU_THRESHOLD)
     ec2_performance_pytorch_inference(
         pytorch_inference,
         "gpu",
+        ec2_instance_type,
         ec2_connection,
         region,
         PT_PERFORMANCE_INFERENCE_GPU_CMD,
         threshold,
     )
 
 
-@pytest.mark.model("resnet18, VGG13, MobileNetV2, GoogleNet, DenseNet121, InceptionV3")
-@pytest.mark.parametrize("ec2_instance_type", ["c5.18xlarge"], indirect=True)
+@pytest.mark.model(
+    "ResNet18, MobileNet_V2, GoogLeNet, DenseNet121, Inception_V3, Bert_128, Roberta_128, DistilBert_128, All-MPNet_128, ASR"
+)
+@pytest.mark.parametrize("ec2_instance_type", PT_EC2_CPU_INSTANCE_TYPE, indirect=True)
 @pytest.mark.team("conda")
-def test_performance_ec2_pytorch_inference_cpu(pytorch_inference, ec2_connection, region, cpu_only):
+def test_performance_ec2_pytorch_inference_cpu(
+    pytorch_inference, ec2_connection, region, cpu_only, ec2_instance_type
+):
     _, framework_version = get_framework_and_version_from_tag(pytorch_inference)
     threshold = get_threshold_for_image(framework_version, PYTORCH_INFERENCE_CPU_THRESHOLD)
     ec2_performance_pytorch_inference(
         pytorch_inference,
         "cpu",
+        ec2_instance_type,
+        ec2_connection,
+        region,
+        PT_PERFORMANCE_INFERENCE_CPU_CMD,
+        threshold,
+    )
+
+
+@pytest.mark.model(
+    "VGG13, MobileNet_V2, GoogLeNet, DenseNet121, Inception_V3, ResNet18, ResNet50, ViT_B_16, Bert_128, Bert_256, Roberta_128, Roberta_256, DistilBert_128, DistilBert_256, All-MPNet_128, All-MPNet_256, ASR"
+)
+@pytest.mark.parametrize("ec2_instance_type", PT_EC2_GPU_ARM64_INSTANCE_TYPE, indirect=True)
+@pytest.mark.parametrize("ec2_instance_ami", [UL22_BASE_ARM64_DLAMI_US_WEST_2], indirect=True)
+@pytest.mark.team("conda")
+def test_performance_ec2_pytorch_inference_arm64_gpu(
+    pytorch_inference_arm64, ec2_connection, region, gpu_only, ec2_instance_type
+):
+    _, framework_version = get_framework_and_version_from_tag(pytorch_inference_arm64)
+    threshold = get_threshold_for_image(framework_version, PYTORCH_INFERENCE_GPU_THRESHOLD)
+    if "arm64" not in pytorch_inference_arm64:
+        pytest.skip("skip benchmark tests for non-arm64 images")
+    ec2_performance_pytorch_inference(
+        pytorch_inference_arm64,
+        "gpu",
+        ec2_instance_type,
+        ec2_connection,
+        region,
+        PT_PERFORMANCE_INFERENCE_GPU_CMD,
+        threshold,
+    )
+
+
+@pytest.mark.model(
+    "ResNet18, MobileNet_V2, GoogLeNet, DenseNet121, Inception_V3, Bert_128, Roberta_128, DistilBert_128, All-MPNet_128, ASR"
+)
+@pytest.mark.parametrize("ec2_instance_type", PT_EC2_CPU_ARM64_INSTANCE_TYPES, indirect=True)
+@pytest.mark.parametrize("ec2_instance_ami", [UL22_BASE_ARM64_DLAMI_US_WEST_2], indirect=True)
+@pytest.mark.team("conda")
+def test_performance_ec2_pytorch_inference_arm64_cpu(
+    pytorch_inference_arm64, ec2_connection, region, cpu_only, ec2_instance_type
+):
+    _, framework_version = get_framework_and_version_from_tag(pytorch_inference_arm64)
+    threshold = get_threshold_for_image(framework_version, PYTORCH_INFERENCE_CPU_THRESHOLD)
+    if "arm64" not in pytorch_inference_arm64:
+        pytest.skip("skip benchmark tests for non-arm64 images")
+
+    ec2_performance_pytorch_inference(
+        pytorch_inference_arm64,
+        "cpu",
+        ec2_instance_type,
         ec2_connection,
         region,
         PT_PERFORMANCE_INFERENCE_CPU_CMD,
@@ -60,7 +136,7 @@ def test_performance_ec2_pytorch_inference_cpu(pytorch_inference, ec2_connection
 
 
 def ec2_performance_pytorch_inference(
-    image_uri, processor, ec2_connection, region, test_cmd, threshold
+    image_uri, processor, ec2_instance_type, ec2_connection, region, test_cmd, threshold
 ):
     docker_runtime = "--runtime=nvidia --gpus all" if processor == "gpu" else ""
     container_test_local_dir = os.path.join("$HOME", "container_tests")
@@ -75,16 +151,52 @@ def ec2_performance_pytorch_inference(
     time_str = time.strftime("%Y-%m-%d-%H-%M-%S")
     commit_info = os.getenv("CODEBUILD_RESOLVED_SOURCE_VERSION")
     # Run performance inference command, display benchmark results to console
+
     container_name = f"{repo_name}-performance-{image_tag}-ec2"
     log_file = f"synthetic_{commit_info}_{time_str}.log"
-    ec2_connection.run(
-        f"docker run {docker_runtime} -d --name {container_name}  -e OMP_NUM_THREADS=1 "
-        f"-v {container_test_local_dir}:{os.path.join(os.sep, 'test')} {image_uri} "
-    )
-    ec2_connection.run(
-        f"docker exec {container_name} " f"python {test_cmd} " f"2>&1 | tee {log_file}"
-    )
-    ec2_connection.run(f"docker rm -f {container_name}")
+
+    try:
+        ec2_connection.run(
+            f"docker run {docker_runtime} -d --name {container_name} -e OMP_NUM_THREADS=1 "
+            f"-v {container_test_local_dir}:{os.path.join(os.sep, 'test')} {image_uri}"
+        )
+
+    except Exception as e:
+        LOGGER.info(f"Failed to start container: {e}")
+        return
+
+    try:
+        ec2_connection.run(f"docker exec {container_name} pip install transformers", warn=True)
+
+    except Exception as e:
+        LOGGER.info(f"Failed to install transformers: {e}")
+        return
+
+    try:
+        LOGGER.info(f"Starting benchmark test on {processor} {ec2_instance_type} instance...")
+        result = ec2_connection.run(
+            f"docker exec {container_name} python {test_cmd} --instance {ec2_instance_type} 2>&1 | tee {log_file}",
+            timeout=3600,
+            warn=True,
+        )
+
+        # Check if the command was successful
+        if result.failed:
+            LOGGER.info(f"Command failed with exit code {result.return_code}")
+            LOGGER.info(f"Error output:\n{result.stderr}")
+        else:
+            LOGGER.info("Command completed successfully")
+        sys.stdout.flush()
+
+    except Exception as e:
+        LOGGER.info(f"An error occurred during test execution: {e}")
+
+    finally:
+        # This block will run regardless of whether an exception occurred
+        LOGGER.info(f"Cleaning {processor} {ec2_instance_type} up...")
+
+        ec2_connection.run(f"docker rm -f {container_name}")
+
     ec2_performance_upload_result_to_s3_and_validate(
         ec2_connection,
         image_uri,
@@ -93,4 +205,5 @@ def ec2_performance_pytorch_inference(
         threshold,
         post_process_inference,
         log_file,
+        ec2_instance_type,
     )
@@ -779,6 +779,7 @@ def ec2_connection(request, ec2_instance, ec2_key_name, ec2_instance_type, regio
     user = ec2_utils.get_instance_user(instance_id, region=region)
 
     LOGGER.info(f"Connecting to {user}@{ip_address}")
+
     conn = Connection(
         user=user,
         host=ip_address,