From a397941753f1f3a57efabc4e6139b03c01ac1c5d Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 29 Apr 2025 12:19:59 -0700
Subject: [PATCH 01/28] [integ-tests-framework] retrieve scaling data

---
 .../validators/test_s3_validators_local.py    | 127 ++++++++++++++++++
 tests/integration-tests/reports_generator.py  |  42 ++++++
 2 files changed, 169 insertions(+)
 create mode 100644 cli/tests/pcluster/validators/test_s3_validators_local.py

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
new file mode 100644
index 0000000000..92ba5b18e6
--- /dev/null
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -0,0 +1,127 @@
+import time
+from collections import defaultdict
+
+import boto3
+import pytest
+
+from cli.build.lib.pcluster.constants import SUPPORTED_OSES
+from pcluster.aws.common import AWSClientError
+from pcluster.validators.s3_validators import S3BucketRegionValidator, S3BucketUriValidator, UrlValidator
+from tests.pcluster.validators.utils import assert_failure_messages
+
+
+def test_url_validator():
+    dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
+    current_time = int(time.time())
+    one_month_ago = current_time - (30 * 24 * 60 * 60)
+
+    filter_expression = "#call_start_time >= :one_month_ago"
+    expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}}
+    all_items = []
+    last_evaluated_key = None
+    while True:
+        projection_expression = "#status, #avg_launch, #max_launch, #min_launch, #creation_time, #name, #os, #start_time"
+        expression_attribute_names = {
+            "#call_start_time": "call_start_time",
+            "#status": "call_status",
+            "#avg_launch": "compute_average_launch_time",
+            "#max_launch": "compute_max_launch_time",
+            "#min_launch": "compute_min_launch_time",
+            "#creation_time": "cluster_creation_time",
+            "#name": "name",
+            "#os": "os",
+            "#start_time": "call_start_time"
+        }
+        # Parameters for the scan operation
+        scan_params = {
+            "TableName": "ParallelCluster-IntegTest-Metadata",
+            "ProjectionExpression": projection_expression,
+            "FilterExpression": filter_expression,
+            "ExpressionAttributeNames": expression_attribute_names,
+            "ExpressionAttributeValues": expression_attribute_values
+        }
+
+        # Add ExclusiveStartKey if we're not on the first iteration
+        if last_evaluated_key:
+            scan_params["ExclusiveStartKey"] = last_evaluated_key
+
+        response = dynamodb_client.scan(**scan_params)
+        all_items.extend(response.get("Items", []))
+
+        # Check if there are more items to fetch
+        last_evaluated_key = response.get("LastEvaluatedKey")
+        if not last_evaluated_key:
+            break
+    all_items.sort(key=lambda x: x["call_start_time"]["N"], reverse=True)
+    result = defaultdict(dict)
+    for category_name in ["os", "name"]:
+        category_name_processing = None
+        if category_name == "name":
+            category_name_processing = _remove_os_from_string
+        for statistics_name in ["cluster_creation_time", "compute_average_launch_time", "compute_min_launch_time", "compute_max_launch_time"]:
+            result[statistics_name][category_name] = _get_statistics_by_category(all_items, category_name, statistics_name, category_name_processing)
+    print(all_items)
+
+def _remove_os_from_string(x):
+    for os in SUPPORTED_OSES:
+        x = x.replace(os, "")
+    return x
+
+def _get_statistics_by_category(all_items, category_name, statistics_name, category_name_processing = None):
+    os_cluster_creation_times = {}
+    for item in all_items:
+        if item["call_status"]["S"] != "passed":
+            continue
+        if statistics_name not in item:
+            continue
+        cluster_creation_time = item[statistics_name]["N"]
+        if cluster_creation_time == "0":
+            continue
+        os = item[category_name]["S"]
+        if category_name_processing:
+            os = category_name_processing(os)
+        if os not in os_cluster_creation_times:
+            os_cluster_creation_times[os] = [float(cluster_creation_time)]
+        else:
+            os_cluster_creation_times[os].append(float(cluster_creation_time))
+    result = {}
+    for os, cluster_creation_times in os_cluster_creation_times.items():
+        cluster_creation_times.sort(reverse=True)
+        result[os] = sum(cluster_creation_times) / len(cluster_creation_times)
+    return sorted(result.items(), key=lambda x: x[1], reverse=True)
+
+
+def _get_launch_time(logs, instance_id):
+    for log in logs:
+        if instance_id in log["message"]:
+            return log["timestamp"]
+
+
+@pytest.mark.parametrize(
+    "url, expected_message",
+    [
+        ("s3://test/test1/test2", None),
+        ("http://test/test.json", "is not a valid S3 URI"),
+    ],
+)
+def test_s3_bucket_uri_validator(mocker, url, expected_message, aws_api_mock):
+    aws_api_mock.s3.head_bucket.return_value = True
+    actual_failures = S3BucketUriValidator().execute(url=url)
+    assert_failure_messages(actual_failures, expected_message)
+    if url.startswith("s3://"):
+        aws_api_mock.s3.head_bucket.assert_called()
+
+
+@pytest.mark.parametrize(
+    "bucket, bucket_region, cluster_region, expected_message",
+    [
+        ("bucket", "us-east-1", "us-east-1", None),
+        ("bucket", "us-west-1", "us-west-1", None),
+        ("bucket", "eu-west-1", "us-east-1", "cannot be used because it is not in the same region of the cluster."),
+    ],
+)
+def test_s3_bucket_region_validator(mocker, bucket, bucket_region, cluster_region, expected_message, aws_api_mock):
+    aws_api_mock.s3.get_bucket_region.return_value = bucket_region
+    actual_failures = S3BucketRegionValidator().execute(bucket=bucket, region=cluster_region)
+    assert_failure_messages(actual_failures, expected_message)
+    aws_api_mock.s3.get_bucket_region.assert_called_with(bucket)
diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py
index 3bf6e5df82..e5a84e3e6a 100644
--- a/tests/integration-tests/reports_generator.py
+++ b/tests/integration-tests/reports_generator.py
@@ -15,6 +15,7 @@
 import time
 from typing import List
 
+import boto3
 import untangle
 from framework.metrics_publisher import Metric, MetricsPublisher
 from junitparser import JUnitXml
@@ -143,3 +144,44 @@ def _put_metrics(
             [Metric(item["name"], item["value"], item["unit"], dimensions, timestamp)],
         )
         time.sleep(put_metric_sleep_interval)
+
+def generate_scaling_data_report():
+    dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
+    current_time = int(time.time())
+    one_month_ago = current_time - (30 * 24 * 60 * 60)
+
+    filter_expression = "#call_start_time >= :one_month_ago"
+    expression_attribute_names = {"#call_start_time": "call_start_time"}
+    expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}}
+    all_items = []
+    last_evaluated_key = None
+    while True:
+        # Parameters for the scan operation
+        scan_params = {
+            "TableName": "ParallelCluster-IntegTest-Metadata",
+            "AttributesToGet": [
+                "call_status",
+                "compute_average_launch_time",
+                "compute_max_launch_time",
+                "compute_min_launch_time",
+                "cluster_creation_time",
+                "name",
+                "os",
+                "call_start_time"
+            ],
+            "FilterExpression": filter_expression,
+            "ExpressionAttributeNames": expression_attribute_names,
+            "ExpressionAttributeValues": expression_attribute_values
+        }
+
+        # Add ExclusiveStartKey if we're not on the first iteration
+        if last_evaluated_key:
+            scan_params["ExclusiveStartKey"] = last_evaluated_key
+
+        response = dynamodb_client.scan(**scan_params)
+        all_items.extend(response.get("Items", []))
+
+        # Check if there are more items to fetch
+        last_evaluated_key = response.get("LastEvaluatedKey")
+        if not last_evaluated_key:
+            break
\ No newline at end of file

From 893ddb51deb420210895101921e866bd63a363ae Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Wed, 30 Apr 2025 10:20:19 -0700
Subject: [PATCH 02/28] [integ-tests-framework] code linters

---
 .../validators/test_s3_validators_local.py    | 41 +++++++++++++++----
 tests/integration-tests/reports_generator.py  |  7 ++--
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index 92ba5b18e6..b7cfcbe205 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -13,14 +13,16 @@
 def test_url_validator():
     dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
     current_time = int(time.time())
-    one_month_ago = current_time - (30 * 24 * 60 * 60)
+    one_month_ago = current_time - (0.5 * 24 * 60 * 60)
 
     filter_expression = "#call_start_time >= :one_month_ago"
     expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}}
     all_items = []
     last_evaluated_key = None
     while True:
-        projection_expression = "#status, #avg_launch, #max_launch, #min_launch, #creation_time, #name, #os, #start_time"
+        projection_expression = (
+            "#status, #avg_launch, #max_launch, #min_launch, #creation_time, #name, #os, #start_time"
+        )
         expression_attribute_names = {
             "#call_start_time": "call_start_time",
             "#status": "call_status",
@@ -30,7 +32,7 @@ def test_url_validator():
             "#creation_time": "cluster_creation_time",
             "#name": "name",
             "#os": "os",
-            "#start_time": "call_start_time"
+            "#start_time": "call_start_time",
         }
         # Parameters for the scan operation
         scan_params = {
@@ -38,7 +40,7 @@ def test_url_validator():
             "ProjectionExpression": projection_expression,
             "FilterExpression": filter_expression,
             "ExpressionAttributeNames": expression_attribute_names,
-            "ExpressionAttributeValues": expression_attribute_values
+            "ExpressionAttributeValues": expression_attribute_values,
         }
 
         # Add ExclusiveStartKey if we're not on the first iteration
@@ -58,16 +60,41 @@ def test_url_validator():
         category_name_processing = None
         if category_name == "name":
             category_name_processing = _remove_os_from_string
-        for statistics_name in ["cluster_creation_time", "compute_average_launch_time", "compute_min_launch_time", "compute_max_launch_time"]:
-            result[statistics_name][category_name] = _get_statistics_by_category(all_items, category_name, statistics_name, category_name_processing)
+        for statistics_name in [
+            "cluster_creation_time",
+            "compute_average_launch_time",
+            "compute_min_launch_time",
+            "compute_max_launch_time",
+        ]:
+            if statistics_name in ["cluster_creation_time", "compute_average_launch_time"]:
+                statistics_processing = _mean
+            elif statistics_name in ["compute_max_launch_time"]:
+                statistics_processing = max
+            else:
+                statistics_processing = min
+            result[statistics_name][category_name] = _get_statistics_by_category(
+                all_items,
+                category_name,
+                statistics_name,
+                category_name_processing=category_name_processing,
+                statistics_processing=statistics_processing,
+            )
     print(all_items)
 
+
+def _mean(x):
+    return sum(x) / len(x)
+
+
 def _remove_os_from_string(x):
     for os in SUPPORTED_OSES:
         x = x.replace(os, "")
     return x
 
-def _get_statistics_by_category(all_items, category_name, statistics_name, category_name_processing = None):
+
+def _get_statistics_by_category(
+    all_items, category_name, statistics_name, category_name_processing=None, statistics_processing=None
+):
     os_cluster_creation_times = {}
     for item in all_items:
         if item["call_status"]["S"] != "passed":
diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py
index e5a84e3e6a..902dc9f563 100644
--- a/tests/integration-tests/reports_generator.py
+++ b/tests/integration-tests/reports_generator.py
@@ -145,6 +145,7 @@ def _put_metrics(
         )
         time.sleep(put_metric_sleep_interval)
 
+
 def generate_scaling_data_report():
     dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
     current_time = int(time.time())
@@ -167,11 +168,11 @@ def generate_scaling_data_report():
                 "cluster_creation_time",
                 "name",
                 "os",
-                "call_start_time"
+                "call_start_time",
             ],
             "FilterExpression": filter_expression,
             "ExpressionAttributeNames": expression_attribute_names,
-            "ExpressionAttributeValues": expression_attribute_values
+            "ExpressionAttributeValues": expression_attribute_values,
         }
 
         # Add ExclusiveStartKey if we're not on the first iteration
@@ -184,4 +185,4 @@ def generate_scaling_data_report():
         # Check if there are more items to fetch
         last_evaluated_key = response.get("LastEvaluatedKey")
         if not last_evaluated_key:
-            break
\ No newline at end of file
+            break

From 33768bc36744ea6094b99ba14966d84dbc394c68 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 3 Jun 2025 12:27:46 -0700
Subject: [PATCH 03/28] Improve to calculate historical data and plot graphs

---
 .../validators/test_s3_validators_local.py    | 89 ++++++++++++++-----
 1 file changed, 68 insertions(+), 21 deletions(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index b7cfcbe205..490f0b23cd 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -8,12 +8,13 @@
 from pcluster.aws.common import AWSClientError
 from pcluster.validators.s3_validators import S3BucketRegionValidator, S3BucketUriValidator, UrlValidator
 from tests.pcluster.validators.utils import assert_failure_messages
+import datetime
 
 
 def test_url_validator():
     dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
     current_time = int(time.time())
-    one_month_ago = current_time - (0.5 * 24 * 60 * 60)
+    one_month_ago = current_time - (300 * 24 * 60 * 60)
 
     filter_expression = "#call_start_time >= :one_month_ago"
     expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}}
@@ -95,28 +96,74 @@ def _remove_os_from_string(x):
 def _get_statistics_by_category(
     all_items, category_name, statistics_name, category_name_processing=None, statistics_processing=None
 ):
-    os_cluster_creation_times = {}
-    for item in all_items:
-        if item["call_status"]["S"] != "passed":
-            continue
-        if statistics_name not in item:
-            continue
-        cluster_creation_time = item[statistics_name]["N"]
-        if cluster_creation_time == "0":
-            continue
-        os = item[category_name]["S"]
-        if category_name_processing:
-            os = category_name_processing(os)
-        if os not in os_cluster_creation_times:
-            os_cluster_creation_times[os] = [float(cluster_creation_time)]
-        else:
-            os_cluster_creation_times[os].append(float(cluster_creation_time))
+    more_data = True
+    lastest_time = float(all_items[0]["call_start_time"]["N"])
+    window_length = 8
     result = {}
-    for os, cluster_creation_times in os_cluster_creation_times.items():
-        cluster_creation_times.sort(reverse=True)
-        result[os] = sum(cluster_creation_times) / len(cluster_creation_times)
-    return sorted(result.items(), key=lambda x: x[1], reverse=True)
+    while more_data:
+        more_data = False
+        os_cluster_creation_times = {}
+        for item in all_items:
+            if item["call_status"]["S"] != "passed":
+                continue
+            if statistics_name not in item:
+                continue
+            if float(item["call_start_time"]["N"]) < lastest_time - (window_length * 24 * 60 * 60):
+                more_data = True
+                continue
+            if float(item["call_start_time"]["N"]) > lastest_time:
+                continue
+            cluster_creation_time = item[statistics_name]["N"]
+            if cluster_creation_time == "0":
+                continue
+            os = item[category_name]["S"]
+            if category_name_processing:
+                os = category_name_processing(os)
+            if os not in os_cluster_creation_times:
+                os_cluster_creation_times[os] = [float(cluster_creation_time)]
+            else:
+                os_cluster_creation_times[os].append(float(cluster_creation_time))
+        for os, cluster_creation_times in os_cluster_creation_times.items():
+            if os not in result:
+                result[os] = []
+            os_time_key = f"{os}-time"
+            if os_time_key not in result:
+                result[os_time_key] = []
+            result[os].insert(0, sum(cluster_creation_times) / len(cluster_creation_times))
+            result[os_time_key].insert(0, datetime.datetime.fromtimestamp(lastest_time).strftime("%Y-%m-%d"))
+        if os_cluster_creation_times:
+            more_data = True
+        lastest_time = lastest_time - 24 * 60 * 60
+        print(lastest_time)
+
+    plot_statistics(result, statistics_name)
+    return result
+    # return sorted(result.items(), key=lambda x: x[1], reverse=True)
+
+import matplotlib.pyplot as plt
+def plot_statistics(result, statistics_name):
+    plt.figure(figsize=(12, 6))
+
+    # Create x-axis values (assuming each point represents a day)
+    for category, values in result.items():
+        if "-time" in category:
+            continue
+        x_values = result[f"{category}-time"]
+        plt.plot(x_values, values, marker='o', label=category)
+
+    plt.title(statistics_name)
+    plt.xlabel('Latest timestamp')
+    plt.ylabel('Average Creation Time')
+    plt.grid(True, linestyle='--', alpha=0.7)
+    plt.legend()
+
+    # Rotate x-axis labels for better readability
+    plt.xticks(rotation=45)
+
+    # Adjust layout to prevent label cutoff
+    plt.tight_layout()
 
+    plt.show()
 
 def _get_launch_time(logs, instance_id):
     for log in logs:

From 79dfa8472ad4d97a81584c2a18202b3746380f61 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Fri, 27 Jun 2025 11:28:39 -0700
Subject: [PATCH 04/28] Plot performance test

---
 .../validators/test_s3_validators_local.py    | 75 +++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index 490f0b23cd..462d4aaad9 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -1,3 +1,5 @@
+import ast
+import json
 import time
 from collections import defaultdict
 
@@ -83,6 +85,57 @@ def test_url_validator():
     print(all_items)
 
 
+
+def test_performance_validator():
+    dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
+    current_time = int(time.time())
+    one_month_ago = current_time - (300 * 24 * 60 * 60)
+
+    filter_expression = "#timestamp >= :one_month_ago"
+    expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}}
+    all_items = []
+    last_evaluated_key = None
+    while True:
+        projection_expression = (
+            "#instance, #name, #os, #result, #timestamp"
+        )
+        expression_attribute_names = {
+            "#instance": "instance",
+            "#name": "name",
+            "#os": "os",
+            "#result": "result",
+            "#timestamp": "timestamp",
+        }
+        # Parameters for the scan operation
+        scan_params = {
+            "TableName": "ParallelCluster-PerformanceTest-Metadata",
+            "ProjectionExpression": projection_expression,
+            "FilterExpression": filter_expression,
+            "ExpressionAttributeNames": expression_attribute_names,
+            "ExpressionAttributeValues": expression_attribute_values,
+        }
+
+        # Add ExclusiveStartKey if we're not on the first iteration
+        if last_evaluated_key:
+            scan_params["ExclusiveStartKey"] = last_evaluated_key
+
+        response = dynamodb_client.scan(**scan_params)
+        all_items.extend(response.get("Items", []))
+
+        # Check if there are more items to fetch
+        last_evaluated_key = response.get("LastEvaluatedKey")
+        if not last_evaluated_key:
+            break
+    all_items.sort(key=lambda x: x["timestamp"]["N"], reverse=True)
+    items_by_name = defaultdict(list)
+    for item in all_items:
+        items_by_name[item["name"]["S"]].append(item)
+    result = defaultdict(dict)
+    for name, items in items_by_name.items():
+        result[name] = _get_statistics_by_node_nume(items)
+    print(all_items)
+
+
 def _mean(x):
     return sum(x) / len(x)
 
@@ -140,6 +193,28 @@ def _get_statistics_by_category(
     return result
     # return sorted(result.items(), key=lambda x: x[1], reverse=True)
 
+def _get_statistics_by_node_nume(
+        all_items
+):
+    result = {}
+    for item in all_items:
+        this_result = ast.literal_eval(item["result"]["S"])
+        for node_num, performance in this_result:
+            if node_num not in result:
+                result[node_num] = {}
+            os = item["os"]["S"]
+            os_time_key = f"{os}-time"
+            if os not in result[node_num]:
+                result[node_num][os] = []
+                result[node_num][os_time_key] = []
+            result[node_num][os].append(performance)
+            result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M"))
+    for node_num, node_num_result in result.items():
+        plot_statistics(node_num_result, node_num)
+    return result
+    # return sorted(result.items(), key=lambda x: x[1], reverse=True)
+
+
 import matplotlib.pyplot as plt
 def plot_statistics(result, statistics_name):
     plt.figure(figsize=(12, 6))

From 20e27a6bfae8185c314649f131107aa63a57d124 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Fri, 27 Jun 2025 11:44:26 -0700
Subject: [PATCH 05/28] Better graphing for performance

---
 .../pcluster/validators/test_s3_validators_local.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index 462d4aaad9..19fc2920b9 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -219,12 +219,21 @@ def _get_statistics_by_node_nume(
 def plot_statistics(result, statistics_name):
     plt.figure(figsize=(12, 6))
 
-    # Create x-axis values (assuming each point represents a day)
+    # Collect all unique timestamps and sort them
+    all_timestamps = set()
+    for category in result:
+        if "-time" in category:
+            all_timestamps.update(result[category])
+    sorted_timestamps = sorted(all_timestamps)
+
+    # Plot each category using the global sorted timestamps
     for category, values in result.items():
         if "-time" in category:
             continue
         x_values = result[f"{category}-time"]
-        plt.plot(x_values, values, marker='o', label=category)
+        time_to_value = dict(zip(x_values, values))
+        sorted_values = [time_to_value.get(t) for t in sorted_timestamps]
+        plt.plot(sorted_timestamps, sorted_values, marker='o', label=category)
 
     plt.title(statistics_name)
     plt.xlabel('Latest timestamp')

From f2de431e470d529abcaab24a70452def270c439e Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Fri, 27 Jun 2025 11:56:31 -0700
Subject: [PATCH 06/28] Revert "Better graphing for performance"

This reverts commit 2c5003a3a6f1f41fa7aa0fe75330dbcf4e23ce9b.
---
 .../pcluster/validators/test_s3_validators_local.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index 19fc2920b9..462d4aaad9 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -219,21 +219,12 @@ def _get_statistics_by_node_nume(
 def plot_statistics(result, statistics_name):
     plt.figure(figsize=(12, 6))
 
-    # Collect all unique timestamps and sort them
-    all_timestamps = set()
-    for category in result:
-        if "-time" in category:
-            all_timestamps.update(result[category])
-    sorted_timestamps = sorted(all_timestamps)
-
-    # Plot each category using the global sorted timestamps
+    # Create x-axis values (assuming each point represents a day)
     for category, values in result.items():
         if "-time" in category:
             continue
         x_values = result[f"{category}-time"]
-        time_to_value = dict(zip(x_values, values))
-        sorted_values = [time_to_value.get(t) for t in sorted_timestamps]
-        plt.plot(sorted_timestamps, sorted_values, marker='o', label=category)
+        plt.plot(x_values, values, marker='o', label=category)
 
     plt.title(statistics_name)
     plt.xlabel('Latest timestamp')

From 9f73182e46d06706ac7ccf2b4a84006cdc0517e1 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Fri, 27 Jun 2025 12:08:15 -0700
Subject: [PATCH 07/28] Better graphing for performance

---
 .../validators/test_s3_validators_local.py    | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index 462d4aaad9..52533b21d5 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -219,25 +219,29 @@ def _get_statistics_by_node_nume(
 def plot_statistics(result, statistics_name):
     plt.figure(figsize=(12, 6))
 
-    # Create x-axis values (assuming each point represents a day)
+    # Collect and sort all unique time points
+    all_times = set()
+    for category, values in result.items():
+        if "-time" in category:
+            all_times.update(values)
+    sorted_times = sorted(all_times)
+    time_to_index = {time: i for i, time in enumerate(sorted_times)}
+
+    # Plot each category using numeric x positions
     for category, values in result.items():
         if "-time" in category:
             continue
         x_values = result[f"{category}-time"]
-        plt.plot(x_values, values, marker='o', label=category)
+        x_positions = [time_to_index[time] for time in x_values]
+        plt.plot(x_positions, values, marker='o', label=category)
 
     plt.title(statistics_name)
     plt.xlabel('Latest timestamp')
     plt.ylabel('Average Creation Time')
     plt.grid(True, linestyle='--', alpha=0.7)
     plt.legend()
-
-    # Rotate x-axis labels for better readability
-    plt.xticks(rotation=45)
-
-    # Adjust layout to prevent label cutoff
+    plt.xticks(range(len(sorted_times)), sorted_times, rotation=45)
     plt.tight_layout()
-
     plt.show()
 
 def _get_launch_time(logs, instance_id):

From 37c6576956ed4cbb5116a97ce906116720cee49e Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Fri, 27 Jun 2025 12:51:59 -0700
Subject: [PATCH 08/28] Use float

---
 cli/tests/pcluster/validators/test_s3_validators_local.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index 52533b21d5..fb0e9b5598 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -207,7 +207,7 @@ def _get_statistics_by_node_nume(
             if os not in result[node_num]:
                 result[node_num][os] = []
                 result[node_num][os_time_key] = []
-            result[node_num][os].append(performance)
+            result[node_num][os].append(float(performance))
             result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M"))
     for node_num, node_num_result in result.items():
         plot_statistics(node_num_result, node_num)

From ef7040de57c6a0b162983b72c63f6ceb448892e1 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Mon, 30 Jun 2025 09:03:04 -0700
Subject: [PATCH 09/28] Better naming

---
 .../pcluster/validators/test_s3_validators_local.py  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index fb0e9b5598..c07308d6de 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -132,7 +132,7 @@ def test_performance_validator():
         items_by_name[item["name"]["S"]].append(item)
     result = defaultdict(dict)
     for name, items in items_by_name.items():
-        result[name] = _get_statistics_by_node_nume(items)
+        result[name] = _get_statistics_by_node_nume(items, name)
     print(all_items)
 
 
@@ -194,7 +194,7 @@ def _get_statistics_by_category(
     # return sorted(result.items(), key=lambda x: x[1], reverse=True)
 
 def _get_statistics_by_node_nume(
-        all_items
+        all_items, name
 ):
     result = {}
     for item in all_items:
@@ -210,14 +210,14 @@ def _get_statistics_by_node_nume(
             result[node_num][os].append(float(performance))
             result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M"))
     for node_num, node_num_result in result.items():
-        plot_statistics(node_num_result, node_num)
+        plot_statistics(node_num_result, node_num, name)
     return result
     # return sorted(result.items(), key=lambda x: x[1], reverse=True)
 
 
 import matplotlib.pyplot as plt
-def plot_statistics(result, statistics_name):
-    plt.figure(figsize=(12, 6))
+def plot_statistics(result, statistics_name, category_name=""):
+    plt.figure(figsize=(24, 12))
 
     # Collect and sort all unique time points
     all_times = set()
@@ -235,7 +235,7 @@ def plot_statistics(result, statistics_name):
         x_positions = [time_to_index[time] for time in x_values]
         plt.plot(x_positions, values, marker='o', label=category)
 
-    plt.title(statistics_name)
+    plt.title(category_name+str(statistics_name))
     plt.xlabel('Latest timestamp')
     plt.ylabel('Average Creation Time')
     plt.grid(True, linestyle='--', alpha=0.7)

From 0ab15e973079ec14f9e7bca92c9ae02f883bc381 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Mon, 25 Aug 2025 10:20:52 -0700
Subject: [PATCH 10/28] monitor test failure

---
 .../validators/test_s3_validators_local.py    | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index c07308d6de..bba71be091 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -84,6 +84,80 @@ def test_url_validator():
             )
     print(all_items)
 
+def _get_failing_rate(all_items, timestamp, time_window):
+    failing_rate = 0
+    item_count = 0
+    for item in all_items:
+        if timestamp <= float(item["call_start_time"]["N"]) <= timestamp + time_window:
+            # Filter result only from 10pm to 11am
+            hour = datetime.datetime.fromtimestamp(float(item["call_start_time"]["N"])).hour
+            if hour >= 22 or hour <=11:
+                item_count += 1
+                if item["call_status"]["S"] != "passed":
+                    failing_rate += 1
+    return failing_rate / item_count
+
+def test_failing_rate():
+    dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
+    current_time = int(time.time())
+    analysis_start_time = current_time - (450 * 24 * 60 * 60)
+
+    filter_expression = "#call_start_time >= :analysis_start_time"
+    expression_attribute_values = {":analysis_start_time": {"N": str(analysis_start_time)}}
+    all_items = []
+    last_evaluated_key = None
+    while True:
+        projection_expression = (
+            "#status, #avg_launch, #max_launch, #min_launch, #creation_time, #name, #os, #start_time"
+        )
+        expression_attribute_names = {
+            "#call_start_time": "call_start_time",
+            "#status": "call_status",
+            "#avg_launch": "compute_average_launch_time",
+            "#max_launch": "compute_max_launch_time",
+            "#min_launch": "compute_min_launch_time",
+            "#creation_time": "cluster_creation_time",
+            "#name": "name",
+            "#os": "os",
+            "#start_time": "call_start_time",
+        }
+        # Parameters for the scan operation
+        scan_params = {
+            "TableName": "ParallelCluster-IntegTest-Metadata",
+            "ProjectionExpression": projection_expression,
+            "FilterExpression": filter_expression,
+            "ExpressionAttributeNames": expression_attribute_names,
+            "ExpressionAttributeValues": expression_attribute_values,
+        }
+
+        # Add ExclusiveStartKey if we're not on the first iteration
+        if last_evaluated_key:
+            scan_params["ExclusiveStartKey"] = last_evaluated_key
+
+        response = dynamodb_client.scan(**scan_params)
+        all_items.extend(response.get("Items", []))
+
+        # Check if there are more items to fetch
+        last_evaluated_key = response.get("LastEvaluatedKey")
+        if not last_evaluated_key:
+            break
+    all_items.sort(key=lambda x: x["call_start_time"]["N"], reverse=True)
+    result = []
+    timestamps = []
+
+    timestamp_pointer = analysis_start_time
+    time_window = 3 * 24 * 60 * 60
+    while timestamp_pointer < current_time:
+        timestamps.append(timestamp_pointer)
+        result.append(_get_failing_rate(all_items, timestamp_pointer, time_window))
+        timestamp_pointer += time_window
+    x_values = [datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d') for ts in timestamps]
+    import matplotlib.pyplot as plt
+    plt.figure(figsize=(24, 12))
+    plt.plot(x_values, result)
+    plt.show()
+    print(timestamps)
+
 
 
 def test_performance_validator():

From bb9dc8e1917cfe1e1ed48ba86c07edfe506f3d47 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Mon, 15 Sep 2025 06:12:58 -0700
Subject: [PATCH 11/28] Larger graph

---
 cli/tests/pcluster/validators/test_s3_validators_local.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index bba71be091..d9415d63e3 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -291,7 +291,7 @@ def _get_statistics_by_node_nume(
 
 import matplotlib.pyplot as plt
 def plot_statistics(result, statistics_name, category_name=""):
-    plt.figure(figsize=(24, 12))
+    plt.figure(figsize=(40, 12))
 
     # Collect and sort all unique time points
     all_times = set()

From 28d692928e41029963d711586021fd244de20d0c Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Mon, 29 Sep 2025 09:53:00 -0700
Subject: [PATCH 12/28] [test] enable local zone testing

---
 .../integration-tests/conftest_networking.py  | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/integration-tests/conftest_networking.py b/tests/integration-tests/conftest_networking.py
index 70f3add6e1..23762c4f7f 100644
--- a/tests/integration-tests/conftest_networking.py
+++ b/tests/integration-tests/conftest_networking.py
@@ -366,6 +366,31 @@ def vpc_stacks_shared(cfn_stacks_factory, request, key_name):
                 )
             )
 
+        for index, (az_id, az_name) in enumerate(get_az_id_to_az_name_map(region, credential, zone_types=["local-zone"]).items()):
+            # Subnets visual representation:
+            # http://www.davidc.net/sites/default/subnets/subnets.html?network=192.168.0.0&mask=16&division=7.70
+            index = -index -1
+            subnets.append(
+                SubnetConfig(
+                    name=subnet_name(visibility="Public", az_id=az_id),
+                    cidr=CIDR_FOR_PUBLIC_SUBNETS[index],
+                    map_public_ip_on_launch=True,
+                    has_nat_gateway=False,
+                    availability_zone=az_name,
+                    default_gateway=Gateways.INTERNET_GATEWAY,
+                )
+            )
+            subnets.append(
+                SubnetConfig(
+                    name=subnet_name(visibility="Private", az_id=az_id),
+                    cidr=CIDR_FOR_PRIVATE_SUBNETS[index],
+                    map_public_ip_on_launch=False,
+                    has_nat_gateway=False,
+                    availability_zone=az_name,
+                    default_gateway=Gateways.NAT_GATEWAY,
+                )
+            )
+
         vpc_config = VPCConfig(
             cidr="192.168.0.0/17",
             additional_cidr_blocks=["192.168.128.0/17"],

From fbd8d235bff469d20286c15c88c059decdca7735 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Mon, 6 Oct 2025 12:48:58 -0700
Subject: [PATCH 13/28] historical trend

---
 .../validators/test_s3_validators_local.py    | 38 ++++++++++---------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index d9415d63e3..ab3bb8d6ad 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -4,6 +4,7 @@
 from collections import defaultdict
 
 import boto3
+import pandas as pd
 import pytest
 
 from cli.build.lib.pcluster.constants import SUPPORTED_OSES
@@ -263,7 +264,7 @@ def _get_statistics_by_category(
         lastest_time = lastest_time - 24 * 60 * 60
         print(lastest_time)
 
-    plot_statistics(result, statistics_name)
+    plot_statistics(result, statistics_name, category_name)
     return result
     # return sorted(result.items(), key=lambda x: x[1], reverse=True)
 
@@ -289,34 +290,35 @@ def _get_statistics_by_node_nume(
     # return sorted(result.items(), key=lambda x: x[1], reverse=True)
 
 
-import matplotlib.pyplot as plt
+import pandas as pd
 def plot_statistics(result, statistics_name, category_name=""):
-    plt.figure(figsize=(40, 12))
-
     # Collect and sort all unique time points
     all_times = set()
     for category, values in result.items():
         if "-time" in category:
             all_times.update(values)
     sorted_times = sorted(all_times)
-    time_to_index = {time: i for i, time in enumerate(sorted_times)}
-
-    # Plot each category using numeric x positions
+    
+    # Create DataFrame with time as index
+    df_data = {'Time': sorted_times}
+    
+    # Add each category as a column
     for category, values in result.items():
         if "-time" in category:
             continue
         x_values = result[f"{category}-time"]
-        x_positions = [time_to_index[time] for time in x_values]
-        plt.plot(x_positions, values, marker='o', label=category)
-
-    plt.title(category_name+str(statistics_name))
-    plt.xlabel('Latest timestamp')
-    plt.ylabel('Average Creation Time')
-    plt.grid(True, linestyle='--', alpha=0.7)
-    plt.legend()
-    plt.xticks(range(len(sorted_times)), sorted_times, rotation=45)
-    plt.tight_layout()
-    plt.show()
+        # Create series aligned with sorted times
+        category_series = pd.Series(index=x_values, data=values)
+        df_data[category] = category_series.reindex(sorted_times)
+    
+    df = pd.DataFrame(df_data)
+    
+    # Write to Excel
+    filename = f"{category_name}_{statistics_name}_statistics.xlsx"
+    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
+        df.to_excel(writer, sheet_name=f"{statistics_name}", index=False)
+    
+    print(f"Excel file saved: {filename}")
 
 def _get_launch_time(logs, instance_id):
     for log in logs:

From a9d8c286199664d5f95119b6da5fe6574744c3b5 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Mon, 6 Oct 2025 13:20:43 -0700
Subject: [PATCH 14/28] historical trend

---
 cli/tests/pcluster/validators/test_s3_validators_local.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index ab3bb8d6ad..7a27860e07 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -283,7 +283,7 @@ def _get_statistics_by_node_nume(
                 result[node_num][os] = []
                 result[node_num][os_time_key] = []
             result[node_num][os].append(float(performance))
-            result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M"))
+            result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M:%S"))
     for node_num, node_num_result in result.items():
         plot_statistics(node_num_result, node_num, name)
     return result
@@ -293,6 +293,8 @@ def _get_statistics_by_node_nume(
 import pandas as pd
 def plot_statistics(result, statistics_name, category_name=""):
     # Collect and sort all unique time points
+    filename = f"{category_name}_{statistics_name}_statistics.xlsx"
+    print(f"Creating Excel file: {filename}...")
     all_times = set()
     for category, values in result.items():
         if "-time" in category:
@@ -314,7 +316,6 @@ def plot_statistics(result, statistics_name, category_name=""):
     df = pd.DataFrame(df_data)
     
     # Write to Excel
-    filename = f"{category_name}_{statistics_name}_statistics.xlsx"
     with pd.ExcelWriter(filename, engine='openpyxl') as writer:
         df.to_excel(writer, sheet_name=f"{statistics_name}", index=False)
     

From ef176ab22bb8fe4932f1e4d94a9273da5a91f33e Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 05:27:27 -0700
Subject: [PATCH 15/28] historical trend

---
 cli/tests/pcluster/validators/test_s3_validators_local.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index 7a27860e07..4d61a91280 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -300,9 +300,8 @@ def plot_statistics(result, statistics_name, category_name=""):
         if "-time" in category:
             all_times.update(values)
     sorted_times = sorted(all_times)
-    
-    # Create DataFrame with time as index
-    df_data = {'Time': sorted_times}
+
+    df_data = {}
     
     # Add each category as a column
     for category, values in result.items():
@@ -317,7 +316,7 @@ def plot_statistics(result, statistics_name, category_name=""):
     
     # Write to Excel
     with pd.ExcelWriter(filename, engine='openpyxl') as writer:
-        df.to_excel(writer, sheet_name=f"{statistics_name}", index=False)
+        df.T.to_excel(writer, sheet_name=f"{statistics_name}", index=True)
     
     print(f"Excel file saved: {filename}")
 

From b3a1983030b5483072c38c7417433deb26178fbf Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 05:38:29 -0700
Subject: [PATCH 16/28] historical trend

---
 .../validators/test_s3_validators_local.py    | 48 ++++++++++++++++---
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py
index 4d61a91280..04854fa383 100644
--- a/cli/tests/pcluster/validators/test_s3_validators_local.py
+++ b/cli/tests/pcluster/validators/test_s3_validators_local.py
@@ -264,7 +264,7 @@ def _get_statistics_by_category(
         lastest_time = lastest_time - 24 * 60 * 60
         print(lastest_time)
 
-    plot_statistics(result, statistics_name, category_name)
+    create_report(result, statistics_name, category_name)
     return result
     # return sorted(result.items(), key=lambda x: x[1], reverse=True)
 
@@ -285,13 +285,42 @@ def _get_statistics_by_node_nume(
             result[node_num][os].append(float(performance))
             result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M:%S"))
     for node_num, node_num_result in result.items():
-        plot_statistics(node_num_result, node_num, name)
+        create_report(node_num_result, node_num, name)
     return result
     # return sorted(result.items(), key=lambda x: x[1], reverse=True)
 
 
+import matplotlib.pyplot as plt
+def plot_statistics(result, statistics_name, category_name):
+    plt.figure(figsize=(40, 12))
+
+    # Collect and sort all unique time points
+    all_times = set()
+    for category, values in result.items():
+        if "-time" in category:
+            all_times.update(values)
+    sorted_times = sorted(all_times)
+    time_to_index = {time: i for i, time in enumerate(sorted_times)}
+
+    # Plot each category using numeric x positions
+    for category, values in result.items():
+        if "-time" in category:
+            continue
+        x_values = result[f"{category}-time"]
+        x_positions = [time_to_index[time] for time in x_values]
+        plt.plot(x_positions, values, marker='o', label=category)
+
+    plt.title(f"{category_name}_{statistics_name}")
+    plt.xlabel('Latest timestamp')
+    plt.ylabel('Value')
+    plt.grid(True, linestyle='--', alpha=0.7)
+    plt.legend()
+    plt.xticks(range(len(sorted_times)), sorted_times, rotation=45)
+    plt.tight_layout()
+    plt.show()
+
 import pandas as pd
-def plot_statistics(result, statistics_name, category_name=""):
+def create_excel_files(result, statistics_name, category_name):
     # Collect and sort all unique time points
     filename = f"{category_name}_{statistics_name}_statistics.xlsx"
     print(f"Creating Excel file: {filename}...")
@@ -302,7 +331,7 @@ def plot_statistics(result, statistics_name, category_name=""):
     sorted_times = sorted(all_times)
 
     df_data = {}
-    
+
     # Add each category as a column
     for category, values in result.items():
         if "-time" in category:
@@ -311,13 +340,13 @@ def plot_statistics(result, statistics_name, category_name=""):
         # Create series aligned with sorted times
         category_series = pd.Series(index=x_values, data=values)
         df_data[category] = category_series.reindex(sorted_times)
-    
+
     df = pd.DataFrame(df_data)
-    
+
     # Write to Excel
     with pd.ExcelWriter(filename, engine='openpyxl') as writer:
         df.T.to_excel(writer, sheet_name=f"{statistics_name}", index=True)
-    
+
     print(f"Excel file saved: {filename}")
 
 def _get_launch_time(logs, instance_id):
@@ -325,6 +354,11 @@ def _get_launch_time(logs, instance_id):
         if instance_id in log["message"]:
             return log["timestamp"]
 
+def create_report(result, statistics_name, category_name, create_graphs=True, create_excel=True):
+    if create_excel:
+        create_excel_files(result, statistics_name, category_name)
+    if create_graphs:
+        plot_statistics(result, statistics_name, category_name)
 
 @pytest.mark.parametrize(
     "url, expected_message",

From 4ba1a2f20f4c77935b9f7a256db4c0d7c4e8c180 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 06:33:02 -0700
Subject: [PATCH 17/28] Test trend

---
 tests/integration-tests/reports_generator.py | 198 +++++++++++++++++++
 1 file changed, 198 insertions(+)

diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py
index 902dc9f563..234f9ec38e 100644
--- a/tests/integration-tests/reports_generator.py
+++ b/tests/integration-tests/reports_generator.py
@@ -13,6 +13,7 @@
 import json
 import os
 import time
+from collections import defaultdict
 from typing import List
 
 import boto3
@@ -20,6 +21,8 @@
 from framework.metrics_publisher import Metric, MetricsPublisher
 from junitparser import JUnitXml
 
+from pcluster.constants import SUPPORTED_OSES
+
 
 def generate_cw_report(test_results_dir, namespace, aws_region, timestamp_day_start=False, start_timestamp=None):
     """
@@ -186,3 +189,198 @@ def generate_scaling_data_report():
         last_evaluated_key = response.get("LastEvaluatedKey")
         if not last_evaluated_key:
             break
+
+def generate_launch_time_report(reports_output_dir):
+    dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
+    current_time = int(time.time())
+    one_month_ago = current_time - (300 * 24 * 60 * 60)
+
+    filter_expression = "#call_start_time >= :one_month_ago"
+    expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}}
+    all_items = []
+    last_evaluated_key = None
+    while True:
+        projection_expression = (
+            "#status, #avg_launch, #max_launch, #min_launch, #creation_time, #name, #os, #start_time"
+        )
+        expression_attribute_names = {
+            "#call_start_time": "call_start_time",
+            "#status": "call_status",
+            "#avg_launch": "compute_average_launch_time",
+            "#max_launch": "compute_max_launch_time",
+            "#min_launch": "compute_min_launch_time",
+            "#creation_time": "cluster_creation_time",
+            "#name": "name",
+            "#os": "os",
+            "#start_time": "call_start_time",
+        }
+        # Parameters for the scan operation
+        scan_params = {
+            "TableName": "ParallelCluster-IntegTest-Metadata",
+            "ProjectionExpression": projection_expression,
+            "FilterExpression": filter_expression,
+            "ExpressionAttributeNames": expression_attribute_names,
+            "ExpressionAttributeValues": expression_attribute_values,
+        }
+
+        # Add ExclusiveStartKey if we're not on the first iteration
+        if last_evaluated_key:
+            scan_params["ExclusiveStartKey"] = last_evaluated_key
+
+        response = dynamodb_client.scan(**scan_params)
+        all_items.extend(response.get("Items", []))
+
+        # Check if there are more items to fetch
+        last_evaluated_key = response.get("LastEvaluatedKey")
+        if not last_evaluated_key:
+            break
+    all_items.sort(key=lambda x: x["call_start_time"]["N"], reverse=True)
+    for category_name in ["os", "name"]:
+        category_name_processing = None
+        if category_name == "name":
+            category_name_processing = _remove_os_from_string
+        for statistics_name in [
+            "cluster_creation_time",
+            "compute_average_launch_time",
+            "compute_min_launch_time",
+            "compute_max_launch_time",
+        ]:
+            if statistics_name in ["cluster_creation_time", "compute_average_launch_time"]:
+                statistics_processing = _mean
+            elif statistics_name in ["compute_max_launch_time"]:
+                statistics_processing = max
+            else:
+                statistics_processing = min
+            result = _get_statistics_by_category(
+                all_items,
+                category_name,
+                statistics_name,
+                category_name_processing=category_name_processing,
+                statistics_processing=statistics_processing,
+            )
+            create_report(result, statistics_name, category_name, reports_output_dir)
+
+def _mean(x):
+    return sum(x) / len(x)
+
+
+def _remove_os_from_string(x):
+    for os in SUPPORTED_OSES:
+        x = x.replace(os, "")
+    return x
+
+
+def _get_statistics_by_category(
+        all_items, category_name, statistics_name, category_name_processing=None, statistics_processing=None
+):
+    more_data = True
+    lastest_time = float(all_items[0]["call_start_time"]["N"])
+    window_length = 8
+    result = {}
+    while more_data:
+        more_data = False
+        os_cluster_creation_times = {}
+        for item in all_items:
+            if item["call_status"]["S"] != "passed":
+                continue
+            if statistics_name not in item:
+                continue
+            if float(item["call_start_time"]["N"]) < lastest_time - (window_length * 24 * 60 * 60):
+                more_data = True
+                continue
+            if float(item["call_start_time"]["N"]) > lastest_time:
+                continue
+            cluster_creation_time = item[statistics_name]["N"]
+            if cluster_creation_time == "0":
+                continue
+            os = item[category_name]["S"]
+            if category_name_processing:
+                os = category_name_processing(os)
+            if os not in os_cluster_creation_times:
+                os_cluster_creation_times[os] = [float(cluster_creation_time)]
+            else:
+                os_cluster_creation_times[os].append(float(cluster_creation_time))
+        for os, cluster_creation_times in os_cluster_creation_times.items():
+            if os not in result:
+                result[os] = []
+            os_time_key = f"{os}-time"
+            if os_time_key not in result:
+                result[os_time_key] = []
+            result[os].insert(0, sum(cluster_creation_times) / len(cluster_creation_times))
+            result[os_time_key].insert(0, datetime.datetime.fromtimestamp(lastest_time).strftime("%Y-%m-%d"))
+        if os_cluster_creation_times:
+            more_data = True
+        lastest_time = lastest_time - 24 * 60 * 60
+        print(lastest_time)
+
+    return result
+
+import matplotlib.pyplot as plt
+def plot_statistics(result, statistics_name, category_name):
+    plt.figure(figsize=(40, 12))
+
+    # Collect and sort all unique time points
+    all_times = set()
+    for category, values in result.items():
+        if "-time" in category:
+            all_times.update(values)
+    sorted_times = sorted(all_times)
+    time_to_index = {time: i for i, time in enumerate(sorted_times)}
+
+    # Plot each category using numeric x positions
+    for category, values in result.items():
+        if "-time" in category:
+            continue
+        x_values = result[f"{category}-time"]
+        x_positions = [time_to_index[time] for time in x_values]
+        plt.plot(x_positions, values, marker='o', label=category)
+
+    plt.title(f"{category_name}_{statistics_name}")
+    plt.xlabel('Latest timestamp')
+    plt.ylabel('Value')
+    plt.grid(True, linestyle='--', alpha=0.7)
+    plt.legend()
+    plt.xticks(range(len(sorted_times)), sorted_times, rotation=45)
+    plt.tight_layout()
+    plt.show()
+
+import pandas as pd
+def create_excel_files(result, statistics_name, category_name, reports_output_dir):
+    # Collect and sort all unique time points
+    filename = os.path.join(reports_output_dir, f"{category_name}_{statistics_name}_statistics.xlsx")
+    print(f"Creating Excel file: {filename}...")
+    all_times = set()
+    for category, values in result.items():
+        if "-time" in category:
+            all_times.update(values)
+    sorted_times = sorted(all_times)
+
+    df_data = {}
+
+    # Add each category as a column
+    for category, values in result.items():
+        if "-time" in category:
+            continue
+        x_values = result[f"{category}-time"]
+        # Create series aligned with sorted times
+        category_series = pd.Series(index=x_values, data=values)
+        df_data[category] = category_series.reindex(sorted_times)
+
+    df = pd.DataFrame(df_data)
+
+    # Write to Excel
+    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
+        df.T.to_excel(writer, sheet_name=f"{statistics_name}", index=True)
+
+    print(f"Excel file saved: {filename}")
+
+def _get_launch_time(logs, instance_id):
+    for log in logs:
+        if instance_id in log["message"]:
+            return log["timestamp"]
+
+def create_report(result, statistics_name, category_name, reports_output_dir, create_graphs=True, create_excel=True):
+    if create_excel:
+        create_excel_files(result, statistics_name, category_name, reports_output_dir)
+    if create_graphs:
+        plot_statistics(result, statistics_name, category_name)
\ No newline at end of file

From f81c49787006755dbf0d64779443b7a5db4e186a Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 07:52:43 -0700
Subject: [PATCH 18/28] test

---
 .../tests/create/test_create.py                  | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/tests/integration-tests/tests/create/test_create.py b/tests/integration-tests/tests/create/test_create.py
index 57257d6545..0e3c44e6b0 100644
--- a/tests/integration-tests/tests/create/test_create.py
+++ b/tests/integration-tests/tests/create/test_create.py
@@ -68,21 +68,7 @@ def test_create_imds_secured(
     In particular, it also verifies that IMDS access is preserved on instance reboot.
     Also checks that the cluster instances respect the desired ImdsSupport setting.
     """
-    cluster_config = pcluster_config_reader(imds_secured=imds_secured)
-    cluster = clusters_factory(cluster_config, raise_on_error=True)
-    status = "required"
-
-    logging.info("Checking cluster access after cluster creation")
-    assert_head_node_is_running(region, cluster)
-    assert_aws_identity_access_is_correct(cluster, users_allow_list)
-    assert_cluster_imds_v2_requirement_status(region, cluster, status)
-
-    reboot_head_node(cluster)
-
-    logging.info("Checking cluster access after head node reboot")
-    assert_head_node_is_running(region, cluster)
-    assert_aws_identity_access_is_correct(cluster, users_allow_list)
-    assert_cluster_imds_v2_requirement_status(region, cluster, status)
+    return
 
 
 @pytest.mark.usefixtures("instance", "scheduler")

From 772fbc06bf77baa5e252a49870e3740628249550 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 09:13:19 -0700
Subject: [PATCH 19/28] historical trend

---
 tests/integration-tests/reports_generator.py | 1 -
 tests/integration-tests/requirements.txt     | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py
index 234f9ec38e..318972af4d 100644
--- a/tests/integration-tests/reports_generator.py
+++ b/tests/integration-tests/reports_generator.py
@@ -13,7 +13,6 @@
 import json
 import os
 import time
-from collections import defaultdict
 from typing import List
 
 import boto3
diff --git a/tests/integration-tests/requirements.txt b/tests/integration-tests/requirements.txt
index a25cab35cc..8ad6a79da9 100644
--- a/tests/integration-tests/requirements.txt
+++ b/tests/integration-tests/requirements.txt
@@ -13,6 +13,7 @@ jsonpickle
 junitparser
 lexicon
 matplotlib
+pandas
 pexpect
 psutil
 pykwalify

From f055bc18433dc1e26ed57e020b7ecbc6f521334d Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 10:06:56 -0700
Subject: [PATCH 20/28] historical trend

---
 tests/integration-tests/test_runner.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration-tests/test_runner.py b/tests/integration-tests/test_runner.py
index 49e13e22cb..9cff93921d 100644
--- a/tests/integration-tests/test_runner.py
+++ b/tests/integration-tests/test_runner.py
@@ -27,7 +27,8 @@
 from framework.tests_configuration.config_renderer import dump_rendered_config_file, read_config_file
 from framework.tests_configuration.config_utils import get_all_regions
 from framework.tests_configuration.config_validator import assert_valid_config
-from reports_generator import generate_cw_report, generate_json_report, generate_junitxml_merged_report
+from reports_generator import generate_cw_report, generate_json_report, generate_junitxml_merged_report, \
+    generate_launch_time_report
 from retrying import retry
 from utils import InstanceTypesData
 
@@ -922,6 +923,7 @@ def main():
     if "json" in args.reports:
         logger.info("Generating tests report")
         generate_json_report(reports_output_dir)
+        generate_launch_time_report(reports_output_dir)
 
     if "cw" in args.reports:
         logger.info("Publishing CloudWatch metrics")

From 27b5ec5ebea1fbd89b958e08547ad30f02a6ede1 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 10:18:23 -0700
Subject: [PATCH 21/28] fix avoid bad subnet

---
 .../tests/configure/test_pcluster_configure.py                  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration-tests/tests/configure/test_pcluster_configure.py b/tests/integration-tests/tests/configure/test_pcluster_configure.py
index c0558f0b5d..0196b8ccb4 100644
--- a/tests/integration-tests/tests/configure/test_pcluster_configure.py
+++ b/tests/integration-tests/tests/configure/test_pcluster_configure.py
@@ -95,7 +95,7 @@ def test_pcluster_configure_avoid_bad_subnets(
             PROMPTS["vpc_creation"]("n"),
             PROMPTS["vpc_id"](vpc_stack.cfn_outputs["VpcId"]),
             PROMPTS["subnet_creation"]("n"),
-            prompt_head_node_subnet_id(subnet_id="", no_of_omitted_subnets=3),
+            prompt_head_node_subnet_id(subnet_id="", no_of_omitted_subnets="*"),
             prompt_compute_node_subnet_id(subnet_id="", head_node_subnet_id="", no_of_omitted_subnets=3),
         ]
     )

From 00b84a86d2d285ff325a1acaa754449e62f8fbab Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 10:25:17 -0700
Subject: [PATCH 22/28] revert

---
 .../integration-tests/conftest_networking.py  | 25 -------------------
 1 file changed, 25 deletions(-)

diff --git a/tests/integration-tests/conftest_networking.py b/tests/integration-tests/conftest_networking.py
index 23762c4f7f..70f3add6e1 100644
--- a/tests/integration-tests/conftest_networking.py
+++ b/tests/integration-tests/conftest_networking.py
@@ -366,31 +366,6 @@ def vpc_stacks_shared(cfn_stacks_factory, request, key_name):
                 )
             )
 
-        for index, (az_id, az_name) in enumerate(get_az_id_to_az_name_map(region, credential, zone_types=["local-zone"]).items()):
-            # Subnets visual representation:
-            # http://www.davidc.net/sites/default/subnets/subnets.html?network=192.168.0.0&mask=16&division=7.70
-            index = -index -1
-            subnets.append(
-                SubnetConfig(
-                    name=subnet_name(visibility="Public", az_id=az_id),
-                    cidr=CIDR_FOR_PUBLIC_SUBNETS[index],
-                    map_public_ip_on_launch=True,
-                    has_nat_gateway=False,
-                    availability_zone=az_name,
-                    default_gateway=Gateways.INTERNET_GATEWAY,
-                )
-            )
-            subnets.append(
-                SubnetConfig(
-                    name=subnet_name(visibility="Private", az_id=az_id),
-                    cidr=CIDR_FOR_PRIVATE_SUBNETS[index],
-                    map_public_ip_on_launch=False,
-                    has_nat_gateway=False,
-                    availability_zone=az_name,
-                    default_gateway=Gateways.NAT_GATEWAY,
-                )
-            )
-
         vpc_config = VPCConfig(
             cidr="192.168.0.0/17",
             additional_cidr_blocks=["192.168.128.0/17"],

From 27a468d7a87088861e24bdf7ce0d9fbb91cb895d Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 10:26:48 -0700
Subject: [PATCH 23/28] historical trend

---
 tests/integration-tests/reports_generator.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py
index 318972af4d..c7d0140a75 100644
--- a/tests/integration-tests/reports_generator.py
+++ b/tests/integration-tests/reports_generator.py
@@ -310,7 +310,6 @@ def _get_statistics_by_category(
         if os_cluster_creation_times:
             more_data = True
         lastest_time = lastest_time - 24 * 60 * 60
-        print(lastest_time)
 
     return result
 

From d9343ae1970bc0b91ea5ce951ece01f42bfb7e85 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 10:27:22 -0700
Subject: [PATCH 24/28] historical trend

---
 tests/integration-tests/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration-tests/requirements.txt b/tests/integration-tests/requirements.txt
index 8ad6a79da9..208644b857 100644
--- a/tests/integration-tests/requirements.txt
+++ b/tests/integration-tests/requirements.txt
@@ -13,6 +13,7 @@ jsonpickle
 junitparser
 lexicon
 matplotlib
+openpyxl
 pandas
 pexpect
 psutil

From 84f7de1f2dfdd312be4a4f3dbcd6f806e2bcbfa2 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 10:34:45 -0700
Subject: [PATCH 25/28] fix avoid bad subnet

---
 .../tests/configure/test_pcluster_configure.py                  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration-tests/tests/configure/test_pcluster_configure.py b/tests/integration-tests/tests/configure/test_pcluster_configure.py
index 0196b8ccb4..6e1f7dce10 100644
--- a/tests/integration-tests/tests/configure/test_pcluster_configure.py
+++ b/tests/integration-tests/tests/configure/test_pcluster_configure.py
@@ -95,7 +95,7 @@ def test_pcluster_configure_avoid_bad_subnets(
             PROMPTS["vpc_creation"]("n"),
             PROMPTS["vpc_id"](vpc_stack.cfn_outputs["VpcId"]),
             PROMPTS["subnet_creation"]("n"),
-            prompt_head_node_subnet_id(subnet_id="", no_of_omitted_subnets="*"),
+            prompt_head_node_subnet_id(subnet_id="", no_of_omitted_subnets=".*"),
             prompt_compute_node_subnet_id(subnet_id="", head_node_subnet_id="", no_of_omitted_subnets=3),
         ]
     )

From f68fc8f3e972a32401b28d4ca503cbe2b4eb82a4 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 10:41:37 -0700
Subject: [PATCH 26/28] historical trend

---
 tests/integration-tests/reports_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py
index c7d0140a75..01d2e10bbb 100644
--- a/tests/integration-tests/reports_generator.py
+++ b/tests/integration-tests/reports_generator.py
@@ -377,7 +377,7 @@ def _get_launch_time(logs, instance_id):
         if instance_id in log["message"]:
             return log["timestamp"]
 
-def create_report(result, statistics_name, category_name, reports_output_dir, create_graphs=True, create_excel=True):
+def create_report(result, statistics_name, category_name, reports_output_dir, create_graphs=False, create_excel=True):
     if create_excel:
         create_excel_files(result, statistics_name, category_name, reports_output_dir)
     if create_graphs:

From 57b74746bea48a8b6fa605be96a1bf13a6e76f57 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 10:44:54 -0700
Subject: [PATCH 27/28] historical performance trend

---
 tests/integration-tests/reports_generator.py | 69 ++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py
index 01d2e10bbb..13982062fe 100644
--- a/tests/integration-tests/reports_generator.py
+++ b/tests/integration-tests/reports_generator.py
@@ -9,10 +9,12 @@
 # or in the "LICENSE.txt" file accompanying this file.
 # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
+import ast
 import datetime
 import json
 import os
 import time
+from collections import defaultdict
 from typing import List
 
 import boto3
@@ -259,6 +261,73 @@ def generate_launch_time_report(reports_output_dir):
             )
             create_report(result, statistics_name, category_name, reports_output_dir)
 
+def generate_performance_report():
+    dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
+    current_time = int(time.time())
+    one_month_ago = current_time - (300 * 24 * 60 * 60)
+
+    filter_expression = "#timestamp >= :one_month_ago"
+    expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}}
+    all_items = []
+    last_evaluated_key = None
+    while True:
+        projection_expression = (
+            "#instance, #name, #os, #result, #timestamp"
+        )
+        expression_attribute_names = {
+            "#instance": "instance",
+            "#name": "name",
+            "#os": "os",
+            "#result": "result",
+            "#timestamp": "timestamp",
+        }
+        # Parameters for the scan operation
+        scan_params = {
+            "TableName": "ParallelCluster-PerformanceTest-Metadata",
+            "ProjectionExpression": projection_expression,
+            "FilterExpression": filter_expression,
+            "ExpressionAttributeNames": expression_attribute_names,
+            "ExpressionAttributeValues": expression_attribute_values,
+        }
+
+        # Add ExclusiveStartKey if we're not on the first iteration
+        if last_evaluated_key:
+            scan_params["ExclusiveStartKey"] = last_evaluated_key
+
+        response = dynamodb_client.scan(**scan_params)
+        all_items.extend(response.get("Items", []))
+
+        # Check if there are more items to fetch
+        last_evaluated_key = response.get("LastEvaluatedKey")
+        if not last_evaluated_key:
+            break
+    all_items.sort(key=lambda x: x["timestamp"]["N"], reverse=True)
+    items_by_name = defaultdict(list)
+    for item in all_items:
+        items_by_name[item["name"]["S"]].append(item)
+    result = defaultdict(dict)
+    for name, items in items_by_name.items():
+        result[name] = _get_statistics_by_node_nume(items, name)
+
+def _get_statistics_by_node_nume(
+        all_items, name
+):
+    result = {}
+    for item in all_items:
+        this_result = ast.literal_eval(item["result"]["S"])
+        for node_num, performance in this_result:
+            if node_num not in result:
+                result[node_num] = {}
+            os = item["os"]["S"]
+            os_time_key = f"{os}-time"
+            if os not in result[node_num]:
+                result[node_num][os] = []
+                result[node_num][os_time_key] = []
+            result[node_num][os].append(float(performance))
+            result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M:%S"))
+    for node_num, node_num_result in result.items():
+        create_report(node_num_result, node_num, name)
+    return result
 def _mean(x):
     return sum(x) / len(x)
 

From 27a3bf2e52f365fb1fdebd6ad946232f1c1cd8a9 Mon Sep 17 00:00:00 2001
From: hanwenli <hanwenli@amazon.com>
Date: Tue, 7 Oct 2025 16:42:50 -0700
Subject: [PATCH 28/28] historical performance trend

---
 tests/integration-tests/reports_generator.py | 8 ++++----
 tests/integration-tests/test_runner.py       | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py
index 13982062fe..1b8306fd1d 100644
--- a/tests/integration-tests/reports_generator.py
+++ b/tests/integration-tests/reports_generator.py
@@ -261,7 +261,7 @@ def generate_launch_time_report(reports_output_dir):
             )
             create_report(result, statistics_name, category_name, reports_output_dir)
 
-def generate_performance_report():
+def generate_performance_report(reports_output_dir):
     dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
     current_time = int(time.time())
     one_month_ago = current_time - (300 * 24 * 60 * 60)
@@ -307,10 +307,10 @@ def generate_performance_report():
         items_by_name[item["name"]["S"]].append(item)
     result = defaultdict(dict)
     for name, items in items_by_name.items():
-        result[name] = _get_statistics_by_node_nume(items, name)
+        result[name] = _get_statistics_by_node_nume(items, name, reports_output_dir)
 
 def _get_statistics_by_node_nume(
-        all_items, name
+        all_items, name, reports_output_dir
 ):
     result = {}
     for item in all_items:
@@ -326,7 +326,7 @@ def _get_statistics_by_node_nume(
             result[node_num][os].append(float(performance))
             result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M:%S"))
     for node_num, node_num_result in result.items():
-        create_report(node_num_result, node_num, name)
+        create_report(node_num_result, node_num, name, reports_output_dir)
     return result
 def _mean(x):
     return sum(x) / len(x)
diff --git a/tests/integration-tests/test_runner.py b/tests/integration-tests/test_runner.py
index 9cff93921d..b9bf7339d0 100644
--- a/tests/integration-tests/test_runner.py
+++ b/tests/integration-tests/test_runner.py
@@ -28,7 +28,7 @@
 from framework.tests_configuration.config_utils import get_all_regions
 from framework.tests_configuration.config_validator import assert_valid_config
 from reports_generator import generate_cw_report, generate_json_report, generate_junitxml_merged_report, \
-    generate_launch_time_report
+    generate_launch_time_report, generate_performance_report
 from retrying import retry
 from utils import InstanceTypesData
 
@@ -924,6 +924,7 @@ def main():
         logger.info("Generating tests report")
         generate_json_report(reports_output_dir)
         generate_launch_time_report(reports_output_dir)
+        generate_performance_report(reports_output_dir)
 
     if "cw" in args.reports:
         logger.info("Publishing CloudWatch metrics")