From a397941753f1f3a57efabc4e6139b03c01ac1c5d Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 29 Apr 2025 12:19:59 -0700 Subject: [PATCH 01/28] [integ-tests-framework] retrieve scaling data --- .../validators/test_s3_validators_local.py | 127 ++++++++++++++++++ tests/integration-tests/reports_generator.py | 42 ++++++ 2 files changed, 169 insertions(+) create mode 100644 cli/tests/pcluster/validators/test_s3_validators_local.py diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py new file mode 100644 index 0000000000..92ba5b18e6 --- /dev/null +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -0,0 +1,127 @@ +import time +from collections import defaultdict + +import boto3 +import pytest + +from cli.build.lib.pcluster.constants import SUPPORTED_OSES +from pcluster.aws.common import AWSClientError +from pcluster.validators.s3_validators import S3BucketRegionValidator, S3BucketUriValidator, UrlValidator +from tests.pcluster.validators.utils import assert_failure_messages + + +def test_url_validator(): + dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") + current_time = int(time.time()) + one_month_ago = current_time - (30 * 24 * 60 * 60) + + filter_expression = "#call_start_time >= :one_month_ago" + expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}} + all_items = [] + last_evaluated_key = None + while True: + projection_expression = "#status, #avg_launch, #max_launch, #min_launch, #creation_time, #name, #os, #start_time" + expression_attribute_names = { + "#call_start_time": "call_start_time", + "#status": "call_status", + "#avg_launch": "compute_average_launch_time", + "#max_launch": "compute_max_launch_time", + "#min_launch": "compute_min_launch_time", + "#creation_time": "cluster_creation_time", + "#name": "name", + "#os": "os", + "#start_time": "call_start_time" + } + # Parameters for the scan operation + scan_params = { + "TableName": "ParallelCluster-IntegTest-Metadata", + "ProjectionExpression": projection_expression, + "FilterExpression": filter_expression, + "ExpressionAttributeNames": expression_attribute_names, + "ExpressionAttributeValues": expression_attribute_values + } + + # Add ExclusiveStartKey if we're not on the first iteration + if last_evaluated_key: + scan_params["ExclusiveStartKey"] = last_evaluated_key + + response = dynamodb_client.scan(**scan_params) + all_items.extend(response.get("Items", [])) + + # Check if there are more items to fetch + last_evaluated_key = response.get("LastEvaluatedKey") + if not last_evaluated_key: + break + all_items.sort(key=lambda x: x["call_start_time"]["N"], reverse=True) + result = defaultdict(dict) + for category_name in ["os", "name"]: + category_name_processing = None + if category_name == "name": + category_name_processing = _remove_os_from_string + for statistics_name in ["cluster_creation_time", "compute_average_launch_time", "compute_min_launch_time", "compute_max_launch_time"]: + result[statistics_name][category_name] = _get_statistics_by_category(all_items, category_name, statistics_name, category_name_processing) + print(all_items) + +def _remove_os_from_string(x): + for os in SUPPORTED_OSES: + x = x.replace(os, "") + return x + +def _get_statistics_by_category(all_items, category_name, statistics_name, category_name_processing = None): + os_cluster_creation_times = {} + for item in all_items: + if item["call_status"]["S"] != "passed": + continue + if statistics_name not in item: + continue + cluster_creation_time = item[statistics_name]["N"] + if cluster_creation_time == "0": + continue + os = item[category_name]["S"] + if category_name_processing: + os = category_name_processing(os) + if os not in os_cluster_creation_times: + os_cluster_creation_times[os] = [float(cluster_creation_time)] + else: + os_cluster_creation_times[os].append(float(cluster_creation_time)) + result = {} + for os, cluster_creation_times in os_cluster_creation_times.items(): + cluster_creation_times.sort(reverse=True) + result[os] = sum(cluster_creation_times) / len(cluster_creation_times) + return sorted(result.items(), key=lambda x: x[1], reverse=True) + + +def _get_launch_time(logs, instance_id): + for log in logs: + if instance_id in log["message"]: + return log["timestamp"] + + +@pytest.mark.parametrize( + "url, expected_message", + [ + ("s3://test/test1/test2", None), + ("http://test/test.json", "is not a valid S3 URI"), + ], +) +def test_s3_bucket_uri_validator(mocker, url, expected_message, aws_api_mock): + aws_api_mock.s3.head_bucket.return_value = True + actual_failures = S3BucketUriValidator().execute(url=url) + assert_failure_messages(actual_failures, expected_message) + if url.startswith("s3://"): + aws_api_mock.s3.head_bucket.assert_called() + + +@pytest.mark.parametrize( + "bucket, bucket_region, cluster_region, expected_message", + [ + ("bucket", "us-east-1", "us-east-1", None), + ("bucket", "us-west-1", "us-west-1", None), + ("bucket", "eu-west-1", "us-east-1", "cannot be used because it is not in the same region of the cluster."), + ], +) +def test_s3_bucket_region_validator(mocker, bucket, bucket_region, cluster_region, expected_message, aws_api_mock): + aws_api_mock.s3.get_bucket_region.return_value = bucket_region + actual_failures = S3BucketRegionValidator().execute(bucket=bucket, region=cluster_region) + assert_failure_messages(actual_failures, expected_message) + aws_api_mock.s3.get_bucket_region.assert_called_with(bucket) diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py index 3bf6e5df82..e5a84e3e6a 100644 --- a/tests/integration-tests/reports_generator.py +++ b/tests/integration-tests/reports_generator.py @@ -15,6 +15,7 @@ import time from typing import List +import boto3 import untangle from framework.metrics_publisher import Metric, MetricsPublisher from junitparser import JUnitXml @@ -143,3 +144,44 @@ def _put_metrics( [Metric(item["name"], item["value"], item["unit"], dimensions, timestamp)], ) time.sleep(put_metric_sleep_interval) + +def generate_scaling_data_report(): + dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") + current_time = int(time.time()) + one_month_ago = current_time - (30 * 24 * 60 * 60) + + filter_expression = "#call_start_time >= :one_month_ago" + expression_attribute_names = {"#call_start_time": "call_start_time"} + expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}} + all_items = [] + last_evaluated_key = None + while True: + # Parameters for the scan operation + scan_params = { + "TableName": "ParallelCluster-IntegTest-Metadata", + "AttributesToGet": [ + "call_status", + "compute_average_launch_time", + "compute_max_launch_time", + "compute_min_launch_time", + "cluster_creation_time", + "name", + "os", + "call_start_time" + ], + "FilterExpression": filter_expression, + "ExpressionAttributeNames": expression_attribute_names, + "ExpressionAttributeValues": expression_attribute_values + } + + # Add ExclusiveStartKey if we're not on the first iteration + if last_evaluated_key: + scan_params["ExclusiveStartKey"] = last_evaluated_key + + response = dynamodb_client.scan(**scan_params) + all_items.extend(response.get("Items", [])) + + # Check if there are more items to fetch + last_evaluated_key = response.get("LastEvaluatedKey") + if not last_evaluated_key: + break \ No newline at end of file From 893ddb51deb420210895101921e866bd63a363ae Mon Sep 17 00:00:00 2001 From: hanwenli Date: Wed, 30 Apr 2025 10:20:19 -0700 Subject: [PATCH 02/28] [integ-tests-framework] code linters --- .../validators/test_s3_validators_local.py | 41 +++++++++++++++---- tests/integration-tests/reports_generator.py | 7 ++-- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index 92ba5b18e6..b7cfcbe205 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -13,14 +13,16 @@ def test_url_validator(): dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") current_time = int(time.time()) - one_month_ago = current_time - (30 * 24 * 60 * 60) + one_month_ago = current_time - (0.5 * 24 * 60 * 60) filter_expression = "#call_start_time >= :one_month_ago" expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}} all_items = [] last_evaluated_key = None while True: - projection_expression = "#status, #avg_launch, #max_launch, #min_launch, #creation_time, #name, #os, #start_time" + projection_expression = ( + "#status, #avg_launch, #max_launch, #min_launch, #creation_time, #name, #os, #start_time" + ) expression_attribute_names = { "#call_start_time": "call_start_time", "#status": "call_status", @@ -30,7 +32,7 @@ def test_url_validator(): "#creation_time": "cluster_creation_time", "#name": "name", "#os": "os", - "#start_time": "call_start_time" + "#start_time": "call_start_time", } # Parameters for the scan operation scan_params = { @@ -38,7 +40,7 @@ def test_url_validator(): "ProjectionExpression": projection_expression, "FilterExpression": filter_expression, "ExpressionAttributeNames": expression_attribute_names, - "ExpressionAttributeValues": expression_attribute_values + "ExpressionAttributeValues": expression_attribute_values, } # Add ExclusiveStartKey if we're not on the first iteration @@ -58,16 +60,41 @@ def test_url_validator(): category_name_processing = None if category_name == "name": category_name_processing = _remove_os_from_string - for statistics_name in ["cluster_creation_time", "compute_average_launch_time", "compute_min_launch_time", "compute_max_launch_time"]: - result[statistics_name][category_name] = _get_statistics_by_category(all_items, category_name, statistics_name, category_name_processing) + for statistics_name in [ + "cluster_creation_time", + "compute_average_launch_time", + "compute_min_launch_time", + "compute_max_launch_time", + ]: + if statistics_name in ["cluster_creation_time", "compute_average_launch_time"]: + statistics_processing = _mean + elif statistics_name in ["compute_max_launch_time"]: + statistics_processing = max + else: + statistics_processing = min + result[statistics_name][category_name] = _get_statistics_by_category( + all_items, + category_name, + statistics_name, + category_name_processing=category_name_processing, + statistics_processing=statistics_processing, + ) print(all_items) + +def _mean(x): + return sum(x) / len(x) + + def _remove_os_from_string(x): for os in SUPPORTED_OSES: x = x.replace(os, "") return x -def _get_statistics_by_category(all_items, category_name, statistics_name, category_name_processing = None): + +def _get_statistics_by_category( + all_items, category_name, statistics_name, category_name_processing=None, statistics_processing=None +): os_cluster_creation_times = {} for item in all_items: if item["call_status"]["S"] != "passed": diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py index e5a84e3e6a..902dc9f563 100644 --- a/tests/integration-tests/reports_generator.py +++ b/tests/integration-tests/reports_generator.py @@ -145,6 +145,7 @@ def _put_metrics( ) time.sleep(put_metric_sleep_interval) + def generate_scaling_data_report(): dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") current_time = int(time.time()) @@ -167,11 +168,11 @@ def generate_scaling_data_report(): "cluster_creation_time", "name", "os", - "call_start_time" + "call_start_time", ], "FilterExpression": filter_expression, "ExpressionAttributeNames": expression_attribute_names, - "ExpressionAttributeValues": expression_attribute_values + "ExpressionAttributeValues": expression_attribute_values, } # Add ExclusiveStartKey if we're not on the first iteration @@ -184,4 +185,4 @@ def generate_scaling_data_report(): # Check if there are more items to fetch last_evaluated_key = response.get("LastEvaluatedKey") if not last_evaluated_key: - break \ No newline at end of file + break From 33768bc36744ea6094b99ba14966d84dbc394c68 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 3 Jun 2025 12:27:46 -0700 Subject: [PATCH 03/28] Improve to calculate historical data and plot graphs --- .../validators/test_s3_validators_local.py | 89 ++++++++++++++----- 1 file changed, 68 insertions(+), 21 deletions(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index b7cfcbe205..490f0b23cd 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -8,12 +8,13 @@ from pcluster.aws.common import AWSClientError from pcluster.validators.s3_validators import S3BucketRegionValidator, S3BucketUriValidator, UrlValidator from tests.pcluster.validators.utils import assert_failure_messages +import datetime def test_url_validator(): dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") current_time = int(time.time()) - one_month_ago = current_time - (0.5 * 24 * 60 * 60) + one_month_ago = current_time - (300 * 24 * 60 * 60) filter_expression = "#call_start_time >= :one_month_ago" expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}} @@ -95,28 +96,74 @@ def _remove_os_from_string(x): def _get_statistics_by_category( all_items, category_name, statistics_name, category_name_processing=None, statistics_processing=None ): - os_cluster_creation_times = {} - for item in all_items: - if item["call_status"]["S"] != "passed": - continue - if statistics_name not in item: - continue - cluster_creation_time = item[statistics_name]["N"] - if cluster_creation_time == "0": - continue - os = item[category_name]["S"] - if category_name_processing: - os = category_name_processing(os) - if os not in os_cluster_creation_times: - os_cluster_creation_times[os] = [float(cluster_creation_time)] - else: - os_cluster_creation_times[os].append(float(cluster_creation_time)) + more_data = True + lastest_time = float(all_items[0]["call_start_time"]["N"]) + window_length = 8 result = {} - for os, cluster_creation_times in os_cluster_creation_times.items(): - cluster_creation_times.sort(reverse=True) - result[os] = sum(cluster_creation_times) / len(cluster_creation_times) - return sorted(result.items(), key=lambda x: x[1], reverse=True) + while more_data: + more_data = False + os_cluster_creation_times = {} + for item in all_items: + if item["call_status"]["S"] != "passed": + continue + if statistics_name not in item: + continue + if float(item["call_start_time"]["N"]) < lastest_time - (window_length * 24 * 60 * 60): + more_data = True + continue + if float(item["call_start_time"]["N"]) > lastest_time: + continue + cluster_creation_time = item[statistics_name]["N"] + if cluster_creation_time == "0": + continue + os = item[category_name]["S"] + if category_name_processing: + os = category_name_processing(os) + if os not in os_cluster_creation_times: + os_cluster_creation_times[os] = [float(cluster_creation_time)] + else: + os_cluster_creation_times[os].append(float(cluster_creation_time)) + for os, cluster_creation_times in os_cluster_creation_times.items(): + if os not in result: + result[os] = [] + os_time_key = f"{os}-time" + if os_time_key not in result: + result[os_time_key] = [] + result[os].insert(0, sum(cluster_creation_times) / len(cluster_creation_times)) + result[os_time_key].insert(0, datetime.datetime.fromtimestamp(lastest_time).strftime("%Y-%m-%d")) + if os_cluster_creation_times: + more_data = True + lastest_time = lastest_time - 24 * 60 * 60 + print(lastest_time) + + plot_statistics(result, statistics_name) + return result + # return sorted(result.items(), key=lambda x: x[1], reverse=True) + +import matplotlib.pyplot as plt +def plot_statistics(result, statistics_name): + plt.figure(figsize=(12, 6)) + + # Create x-axis values (assuming each point represents a day) + for category, values in result.items(): + if "-time" in category: + continue + x_values = result[f"{category}-time"] + plt.plot(x_values, values, marker='o', label=category) + + plt.title(statistics_name) + plt.xlabel('Latest timestamp') + plt.ylabel('Average Creation Time') + plt.grid(True, linestyle='--', alpha=0.7) + plt.legend() + + # Rotate x-axis labels for better readability + plt.xticks(rotation=45) + + # Adjust layout to prevent label cutoff + plt.tight_layout() + plt.show() def _get_launch_time(logs, instance_id): for log in logs: From 79dfa8472ad4d97a81584c2a18202b3746380f61 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Fri, 27 Jun 2025 11:28:39 -0700 Subject: [PATCH 04/28] Plot performance test --- .../validators/test_s3_validators_local.py | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index 490f0b23cd..462d4aaad9 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -1,3 +1,5 @@ +import ast +import json import time from collections import defaultdict @@ -83,6 +85,57 @@ def test_url_validator(): print(all_items) + +def test_performance_validator(): + dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") + current_time = int(time.time()) + one_month_ago = current_time - (300 * 24 * 60 * 60) + + filter_expression = "#timestamp >= :one_month_ago" + expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}} + all_items = [] + last_evaluated_key = None + while True: + projection_expression = ( + "#instance, #name, #os, #result, #timestamp" + ) + expression_attribute_names = { + "#instance": "instance", + "#name": "name", + "#os": "os", + "#result": "result", + "#timestamp": "timestamp", + } + # Parameters for the scan operation + scan_params = { + "TableName": "ParallelCluster-PerformanceTest-Metadata", + "ProjectionExpression": projection_expression, + "FilterExpression": filter_expression, + "ExpressionAttributeNames": expression_attribute_names, + "ExpressionAttributeValues": expression_attribute_values, + } + + # Add ExclusiveStartKey if we're not on the first iteration + if last_evaluated_key: + scan_params["ExclusiveStartKey"] = last_evaluated_key + + response = dynamodb_client.scan(**scan_params) + all_items.extend(response.get("Items", [])) + + # Check if there are more items to fetch + last_evaluated_key = response.get("LastEvaluatedKey") + if not last_evaluated_key: + break + all_items.sort(key=lambda x: x["timestamp"]["N"], reverse=True) + items_by_name = defaultdict(list) + for item in all_items: + items_by_name[item["name"]["S"]].append(item) + result = defaultdict(dict) + for name, items in items_by_name.items(): + result[name] = _get_statistics_by_node_nume(items) + print(all_items) + + def _mean(x): return sum(x) / len(x) @@ -140,6 +193,28 @@ def _get_statistics_by_category( return result # return sorted(result.items(), key=lambda x: x[1], reverse=True) +def _get_statistics_by_node_nume( + all_items +): + result = {} + for item in all_items: + this_result = ast.literal_eval(item["result"]["S"]) + for node_num, performance in this_result: + if node_num not in result: + result[node_num] = {} + os = item["os"]["S"] + os_time_key = f"{os}-time" + if os not in result[node_num]: + result[node_num][os] = [] + result[node_num][os_time_key] = [] + result[node_num][os].append(performance) + result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M")) + for node_num, node_num_result in result.items(): + plot_statistics(node_num_result, node_num) + return result + # return sorted(result.items(), key=lambda x: x[1], reverse=True) + + import matplotlib.pyplot as plt def plot_statistics(result, statistics_name): plt.figure(figsize=(12, 6)) From 20e27a6bfae8185c314649f131107aa63a57d124 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Fri, 27 Jun 2025 11:44:26 -0700 Subject: [PATCH 05/28] Better graphing for performance --- .../pcluster/validators/test_s3_validators_local.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index 462d4aaad9..19fc2920b9 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -219,12 +219,21 @@ def _get_statistics_by_node_nume( def plot_statistics(result, statistics_name): plt.figure(figsize=(12, 6)) - # Create x-axis values (assuming each point represents a day) + # Collect all unique timestamps and sort them + all_timestamps = set() + for category in result: + if "-time" in category: + all_timestamps.update(result[category]) + sorted_timestamps = sorted(all_timestamps) + + # Plot each category using the global sorted timestamps for category, values in result.items(): if "-time" in category: continue x_values = result[f"{category}-time"] - plt.plot(x_values, values, marker='o', label=category) + time_to_value = dict(zip(x_values, values)) + sorted_values = [time_to_value.get(t) for t in sorted_timestamps] + plt.plot(sorted_timestamps, sorted_values, marker='o', label=category) plt.title(statistics_name) plt.xlabel('Latest timestamp') From f2de431e470d529abcaab24a70452def270c439e Mon Sep 17 00:00:00 2001 From: hanwenli Date: Fri, 27 Jun 2025 11:56:31 -0700 Subject: [PATCH 06/28] Revert "Better graphing for performance" This reverts commit 2c5003a3a6f1f41fa7aa0fe75330dbcf4e23ce9b. --- .../pcluster/validators/test_s3_validators_local.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index 19fc2920b9..462d4aaad9 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -219,21 +219,12 @@ def _get_statistics_by_node_nume( def plot_statistics(result, statistics_name): plt.figure(figsize=(12, 6)) - # Collect all unique timestamps and sort them - all_timestamps = set() - for category in result: - if "-time" in category: - all_timestamps.update(result[category]) - sorted_timestamps = sorted(all_timestamps) - - # Plot each category using the global sorted timestamps + # Create x-axis values (assuming each point represents a day) for category, values in result.items(): if "-time" in category: continue x_values = result[f"{category}-time"] - time_to_value = dict(zip(x_values, values)) - sorted_values = [time_to_value.get(t) for t in sorted_timestamps] - plt.plot(sorted_timestamps, sorted_values, marker='o', label=category) + plt.plot(x_values, values, marker='o', label=category) plt.title(statistics_name) plt.xlabel('Latest timestamp') From 9f73182e46d06706ac7ccf2b4a84006cdc0517e1 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Fri, 27 Jun 2025 12:08:15 -0700 Subject: [PATCH 07/28] Better graphing for performance --- .../validators/test_s3_validators_local.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index 462d4aaad9..52533b21d5 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -219,25 +219,29 @@ def _get_statistics_by_node_nume( def plot_statistics(result, statistics_name): plt.figure(figsize=(12, 6)) - # Create x-axis values (assuming each point represents a day) + # Collect and sort all unique time points + all_times = set() + for category, values in result.items(): + if "-time" in category: + all_times.update(values) + sorted_times = sorted(all_times) + time_to_index = {time: i for i, time in enumerate(sorted_times)} + + # Plot each category using numeric x positions for category, values in result.items(): if "-time" in category: continue x_values = result[f"{category}-time"] - plt.plot(x_values, values, marker='o', label=category) + x_positions = [time_to_index[time] for time in x_values] + plt.plot(x_positions, values, marker='o', label=category) plt.title(statistics_name) plt.xlabel('Latest timestamp') plt.ylabel('Average Creation Time') plt.grid(True, linestyle='--', alpha=0.7) plt.legend() - - # Rotate x-axis labels for better readability - plt.xticks(rotation=45) - - # Adjust layout to prevent label cutoff + plt.xticks(range(len(sorted_times)), sorted_times, rotation=45) plt.tight_layout() - plt.show() def _get_launch_time(logs, instance_id): From 37c6576956ed4cbb5116a97ce906116720cee49e Mon Sep 17 00:00:00 2001 From: hanwenli Date: Fri, 27 Jun 2025 12:51:59 -0700 Subject: [PATCH 08/28] Use float --- cli/tests/pcluster/validators/test_s3_validators_local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index 52533b21d5..fb0e9b5598 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -207,7 +207,7 @@ def _get_statistics_by_node_nume( if os not in result[node_num]: result[node_num][os] = [] result[node_num][os_time_key] = [] - result[node_num][os].append(performance) + result[node_num][os].append(float(performance)) result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M")) for node_num, node_num_result in result.items(): plot_statistics(node_num_result, node_num) From ef7040de57c6a0b162983b72c63f6ceb448892e1 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Mon, 30 Jun 2025 09:03:04 -0700 Subject: [PATCH 09/28] Better naming --- .../pcluster/validators/test_s3_validators_local.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index fb0e9b5598..c07308d6de 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -132,7 +132,7 @@ def test_performance_validator(): items_by_name[item["name"]["S"]].append(item) result = defaultdict(dict) for name, items in items_by_name.items(): - result[name] = _get_statistics_by_node_nume(items) + result[name] = _get_statistics_by_node_nume(items, name) print(all_items) @@ -194,7 +194,7 @@ def _get_statistics_by_category( # return sorted(result.items(), key=lambda x: x[1], reverse=True) def _get_statistics_by_node_nume( - all_items + all_items, name ): result = {} for item in all_items: @@ -210,14 +210,14 @@ def _get_statistics_by_node_nume( result[node_num][os].append(float(performance)) result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M")) for node_num, node_num_result in result.items(): - plot_statistics(node_num_result, node_num) + plot_statistics(node_num_result, node_num, name) return result # return sorted(result.items(), key=lambda x: x[1], reverse=True) import matplotlib.pyplot as plt -def plot_statistics(result, statistics_name): - plt.figure(figsize=(12, 6)) +def plot_statistics(result, statistics_name, category_name=""): + plt.figure(figsize=(24, 12)) # Collect and sort all unique time points all_times = set() @@ -235,7 +235,7 @@ def plot_statistics(result, statistics_name): x_positions = [time_to_index[time] for time in x_values] plt.plot(x_positions, values, marker='o', label=category) - plt.title(statistics_name) + plt.title(category_name+str(statistics_name)) plt.xlabel('Latest timestamp') plt.ylabel('Average Creation Time') plt.grid(True, linestyle='--', alpha=0.7) From 0ab15e973079ec14f9e7bca92c9ae02f883bc381 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Mon, 25 Aug 2025 10:20:52 -0700 Subject: [PATCH 10/28] monitor test failure --- .../validators/test_s3_validators_local.py | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index c07308d6de..bba71be091 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -84,6 +84,80 @@ def test_url_validator(): ) print(all_items) +def _get_failing_rate(all_items, timestamp, time_window): + failing_rate = 0 + item_count = 0 + for item in all_items: + if timestamp <= float(item["call_start_time"]["N"]) <= timestamp + time_window: + # Filter result only from 10pm to 11am + hour = datetime.datetime.fromtimestamp(float(item["call_start_time"]["N"])).hour + if hour >= 22 or hour <=11: + item_count += 1 + if item["call_status"]["S"] != "passed": + failing_rate += 1 + return failing_rate / item_count + +def test_failing_rate(): + dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") + current_time = int(time.time()) + analysis_start_time = current_time - (450 * 24 * 60 * 60) + + filter_expression = "#call_start_time >= :analysis_start_time" + expression_attribute_values = {":analysis_start_time": {"N": str(analysis_start_time)}} + all_items = [] + last_evaluated_key = None + while True: + projection_expression = ( + "#status, #avg_launch, #max_launch, #min_launch, #creation_time, #name, #os, #start_time" + ) + expression_attribute_names = { + "#call_start_time": "call_start_time", + "#status": "call_status", + "#avg_launch": "compute_average_launch_time", + "#max_launch": "compute_max_launch_time", + "#min_launch": "compute_min_launch_time", + "#creation_time": "cluster_creation_time", + "#name": "name", + "#os": "os", + "#start_time": "call_start_time", + } + # Parameters for the scan operation + scan_params = { + "TableName": "ParallelCluster-IntegTest-Metadata", + "ProjectionExpression": projection_expression, + "FilterExpression": filter_expression, + "ExpressionAttributeNames": expression_attribute_names, + "ExpressionAttributeValues": expression_attribute_values, + } + + # Add ExclusiveStartKey if we're not on the first iteration + if last_evaluated_key: + scan_params["ExclusiveStartKey"] = last_evaluated_key + + response = dynamodb_client.scan(**scan_params) + all_items.extend(response.get("Items", [])) + + # Check if there are more items to fetch + last_evaluated_key = response.get("LastEvaluatedKey") + if not last_evaluated_key: + break + all_items.sort(key=lambda x: x["call_start_time"]["N"], reverse=True) + result = [] + timestamps = [] + + timestamp_pointer = analysis_start_time + time_window = 3 * 24 * 60 * 60 + while timestamp_pointer < current_time: + timestamps.append(timestamp_pointer) + result.append(_get_failing_rate(all_items, timestamp_pointer, time_window)) + timestamp_pointer += time_window + x_values = [datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d') for ts in timestamps] + import matplotlib.pyplot as plt + plt.figure(figsize=(24, 12)) + plt.plot(x_values, result) + plt.show() + print(timestamps) + def test_performance_validator(): From bb9dc8e1917cfe1e1ed48ba86c07edfe506f3d47 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Mon, 15 Sep 2025 06:12:58 -0700 Subject: [PATCH 11/28] Larger graph --- cli/tests/pcluster/validators/test_s3_validators_local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index bba71be091..d9415d63e3 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -291,7 +291,7 @@ def _get_statistics_by_node_nume( import matplotlib.pyplot as plt def plot_statistics(result, statistics_name, category_name=""): - plt.figure(figsize=(24, 12)) + plt.figure(figsize=(40, 12)) # Collect and sort all unique time points all_times = set() From 28d692928e41029963d711586021fd244de20d0c Mon Sep 17 00:00:00 2001 From: hanwenli Date: Mon, 29 Sep 2025 09:53:00 -0700 Subject: [PATCH 12/28] [test] enable local zone testing --- .../integration-tests/conftest_networking.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/integration-tests/conftest_networking.py b/tests/integration-tests/conftest_networking.py index 70f3add6e1..23762c4f7f 100644 --- a/tests/integration-tests/conftest_networking.py +++ b/tests/integration-tests/conftest_networking.py @@ -366,6 +366,31 @@ def vpc_stacks_shared(cfn_stacks_factory, request, key_name): ) ) + for index, (az_id, az_name) in enumerate(get_az_id_to_az_name_map(region, credential, zone_types=["local-zone"]).items()): + # Subnets visual representation: + # http://www.davidc.net/sites/default/subnets/subnets.html?network=192.168.0.0&mask=16&division=7.70 + index = -index -1 + subnets.append( + SubnetConfig( + name=subnet_name(visibility="Public", az_id=az_id), + cidr=CIDR_FOR_PUBLIC_SUBNETS[index], + map_public_ip_on_launch=True, + has_nat_gateway=False, + availability_zone=az_name, + default_gateway=Gateways.INTERNET_GATEWAY, + ) + ) + subnets.append( + SubnetConfig( + name=subnet_name(visibility="Private", az_id=az_id), + cidr=CIDR_FOR_PRIVATE_SUBNETS[index], + map_public_ip_on_launch=False, + has_nat_gateway=False, + availability_zone=az_name, + default_gateway=Gateways.NAT_GATEWAY, + ) + ) + vpc_config = VPCConfig( cidr="192.168.0.0/17", additional_cidr_blocks=["192.168.128.0/17"], From fbd8d235bff469d20286c15c88c059decdca7735 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Mon, 6 Oct 2025 12:48:58 -0700 Subject: [PATCH 13/28] historical trend --- .../validators/test_s3_validators_local.py | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index d9415d63e3..ab3bb8d6ad 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -4,6 +4,7 @@ from collections import defaultdict import boto3 +import pandas as pd import pytest from cli.build.lib.pcluster.constants import SUPPORTED_OSES @@ -263,7 +264,7 @@ def _get_statistics_by_category( lastest_time = lastest_time - 24 * 60 * 60 print(lastest_time) - plot_statistics(result, statistics_name) + plot_statistics(result, statistics_name, category_name) return result # return sorted(result.items(), key=lambda x: x[1], reverse=True) @@ -289,34 +290,35 @@ def _get_statistics_by_node_nume( # return sorted(result.items(), key=lambda x: x[1], reverse=True) -import matplotlib.pyplot as plt +import pandas as pd def plot_statistics(result, statistics_name, category_name=""): - plt.figure(figsize=(40, 12)) - # Collect and sort all unique time points all_times = set() for category, values in result.items(): if "-time" in category: all_times.update(values) sorted_times = sorted(all_times) - time_to_index = {time: i for i, time in enumerate(sorted_times)} - - # Plot each category using numeric x positions + + # Create DataFrame with time as index + df_data = {'Time': sorted_times} + + # Add each category as a column for category, values in result.items(): if "-time" in category: continue x_values = result[f"{category}-time"] - x_positions = [time_to_index[time] for time in x_values] - plt.plot(x_positions, values, marker='o', label=category) - - plt.title(category_name+str(statistics_name)) - plt.xlabel('Latest timestamp') - plt.ylabel('Average Creation Time') - plt.grid(True, linestyle='--', alpha=0.7) - plt.legend() - plt.xticks(range(len(sorted_times)), sorted_times, rotation=45) - plt.tight_layout() - plt.show() + # Create series aligned with sorted times + category_series = pd.Series(index=x_values, data=values) + df_data[category] = category_series.reindex(sorted_times) + + df = pd.DataFrame(df_data) + + # Write to Excel + filename = f"{category_name}_{statistics_name}_statistics.xlsx" + with pd.ExcelWriter(filename, engine='openpyxl') as writer: + df.to_excel(writer, sheet_name=f"{statistics_name}", index=False) + + print(f"Excel file saved: {filename}") def _get_launch_time(logs, instance_id): for log in logs: From a9d8c286199664d5f95119b6da5fe6574744c3b5 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Mon, 6 Oct 2025 13:20:43 -0700 Subject: [PATCH 14/28] historical trend --- cli/tests/pcluster/validators/test_s3_validators_local.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index ab3bb8d6ad..7a27860e07 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -283,7 +283,7 @@ def _get_statistics_by_node_nume( result[node_num][os] = [] result[node_num][os_time_key] = [] result[node_num][os].append(float(performance)) - result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M")) + result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M:%S")) for node_num, node_num_result in result.items(): plot_statistics(node_num_result, node_num, name) return result @@ -293,6 +293,8 @@ def _get_statistics_by_node_nume( import pandas as pd def plot_statistics(result, statistics_name, category_name=""): # Collect and sort all unique time points + filename = f"{category_name}_{statistics_name}_statistics.xlsx" + print(f"Creating Excel file: {filename}...") all_times = set() for category, values in result.items(): if "-time" in category: @@ -314,7 +316,6 @@ def plot_statistics(result, statistics_name, category_name=""): df = pd.DataFrame(df_data) # Write to Excel - filename = f"{category_name}_{statistics_name}_statistics.xlsx" with pd.ExcelWriter(filename, engine='openpyxl') as writer: df.to_excel(writer, sheet_name=f"{statistics_name}", index=False) From ef176ab22bb8fe4932f1e4d94a9273da5a91f33e Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 05:27:27 -0700 Subject: [PATCH 15/28] historical trend --- cli/tests/pcluster/validators/test_s3_validators_local.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index 7a27860e07..4d61a91280 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -300,9 +300,8 @@ def plot_statistics(result, statistics_name, category_name=""): if "-time" in category: all_times.update(values) sorted_times = sorted(all_times) - - # Create DataFrame with time as index - df_data = {'Time': sorted_times} + + df_data = {} # Add each category as a column for category, values in result.items(): @@ -317,7 +316,7 @@ def plot_statistics(result, statistics_name, category_name=""): # Write to Excel with pd.ExcelWriter(filename, engine='openpyxl') as writer: - df.to_excel(writer, sheet_name=f"{statistics_name}", index=False) + df.T.to_excel(writer, sheet_name=f"{statistics_name}", index=True) print(f"Excel file saved: {filename}") From b3a1983030b5483072c38c7417433deb26178fbf Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 05:38:29 -0700 Subject: [PATCH 16/28] historical trend --- .../validators/test_s3_validators_local.py | 48 ++++++++++++++++--- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/cli/tests/pcluster/validators/test_s3_validators_local.py b/cli/tests/pcluster/validators/test_s3_validators_local.py index 4d61a91280..04854fa383 100644 --- a/cli/tests/pcluster/validators/test_s3_validators_local.py +++ b/cli/tests/pcluster/validators/test_s3_validators_local.py @@ -264,7 +264,7 @@ def _get_statistics_by_category( lastest_time = lastest_time - 24 * 60 * 60 print(lastest_time) - plot_statistics(result, statistics_name, category_name) + create_report(result, statistics_name, category_name) return result # return sorted(result.items(), key=lambda x: x[1], reverse=True) @@ -285,13 +285,42 @@ def _get_statistics_by_node_nume( result[node_num][os].append(float(performance)) result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M:%S")) for node_num, node_num_result in result.items(): - plot_statistics(node_num_result, node_num, name) + create_report(node_num_result, node_num, name) return result # return sorted(result.items(), key=lambda x: x[1], reverse=True) +import matplotlib.pyplot as plt +def plot_statistics(result, statistics_name, category_name): + plt.figure(figsize=(40, 12)) + + # Collect and sort all unique time points + all_times = set() + for category, values in result.items(): + if "-time" in category: + all_times.update(values) + sorted_times = sorted(all_times) + time_to_index = {time: i for i, time in enumerate(sorted_times)} + + # Plot each category using numeric x positions + for category, values in result.items(): + if "-time" in category: + continue + x_values = result[f"{category}-time"] + x_positions = [time_to_index[time] for time in x_values] + plt.plot(x_positions, values, marker='o', label=category) + + plt.title(f"{category_name}_{statistics_name}") + plt.xlabel('Latest timestamp') + plt.ylabel('Value') + plt.grid(True, linestyle='--', alpha=0.7) + plt.legend() + plt.xticks(range(len(sorted_times)), sorted_times, rotation=45) + plt.tight_layout() + plt.show() + import pandas as pd -def plot_statistics(result, statistics_name, category_name=""): +def create_excel_files(result, statistics_name, category_name): # Collect and sort all unique time points filename = f"{category_name}_{statistics_name}_statistics.xlsx" print(f"Creating Excel file: {filename}...") @@ -302,7 +331,7 @@ def plot_statistics(result, statistics_name, category_name=""): sorted_times = sorted(all_times) df_data = {} - + # Add each category as a column for category, values in result.items(): if "-time" in category: @@ -311,13 +340,13 @@ def plot_statistics(result, statistics_name, category_name=""): # Create series aligned with sorted times category_series = pd.Series(index=x_values, data=values) df_data[category] = category_series.reindex(sorted_times) - + df = pd.DataFrame(df_data) - + # Write to Excel with pd.ExcelWriter(filename, engine='openpyxl') as writer: df.T.to_excel(writer, sheet_name=f"{statistics_name}", index=True) - + print(f"Excel file saved: {filename}") def _get_launch_time(logs, instance_id): @@ -325,6 +354,11 @@ def _get_launch_time(logs, instance_id): if instance_id in log["message"]: return log["timestamp"] +def create_report(result, statistics_name, category_name, create_graphs=True, create_excel=True): + if create_excel: + create_excel_files(result, statistics_name, category_name) + if create_graphs: + plot_statistics(result, statistics_name, category_name) @pytest.mark.parametrize( "url, expected_message", From 4ba1a2f20f4c77935b9f7a256db4c0d7c4e8c180 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 06:33:02 -0700 Subject: [PATCH 17/28] Test trend --- tests/integration-tests/reports_generator.py | 198 +++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py index 902dc9f563..234f9ec38e 100644 --- a/tests/integration-tests/reports_generator.py +++ b/tests/integration-tests/reports_generator.py @@ -13,6 +13,7 @@ import json import os import time +from collections import defaultdict from typing import List import boto3 @@ -20,6 +21,8 @@ from framework.metrics_publisher import Metric, MetricsPublisher from junitparser import JUnitXml +from pcluster.constants import SUPPORTED_OSES + def generate_cw_report(test_results_dir, namespace, aws_region, timestamp_day_start=False, start_timestamp=None): """ @@ -186,3 +189,198 @@ def generate_scaling_data_report(): last_evaluated_key = response.get("LastEvaluatedKey") if not last_evaluated_key: break + +def generate_launch_time_report(reports_output_dir): + dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") + current_time = int(time.time()) + one_month_ago = current_time - (300 * 24 * 60 * 60) + + filter_expression = "#call_start_time >= :one_month_ago" + expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}} + all_items = [] + last_evaluated_key = None + while True: + projection_expression = ( + "#status, #avg_launch, #max_launch, #min_launch, #creation_time, #name, #os, #start_time" + ) + expression_attribute_names = { + "#call_start_time": "call_start_time", + "#status": "call_status", + "#avg_launch": "compute_average_launch_time", + "#max_launch": "compute_max_launch_time", + "#min_launch": "compute_min_launch_time", + "#creation_time": "cluster_creation_time", + "#name": "name", + "#os": "os", + "#start_time": "call_start_time", + } + # Parameters for the scan operation + scan_params = { + "TableName": "ParallelCluster-IntegTest-Metadata", + "ProjectionExpression": projection_expression, + "FilterExpression": filter_expression, + "ExpressionAttributeNames": expression_attribute_names, + "ExpressionAttributeValues": expression_attribute_values, + } + + # Add ExclusiveStartKey if we're not on the first iteration + if last_evaluated_key: + scan_params["ExclusiveStartKey"] = last_evaluated_key + + response = dynamodb_client.scan(**scan_params) + all_items.extend(response.get("Items", [])) + + # Check if there are more items to fetch + last_evaluated_key = response.get("LastEvaluatedKey") + if not last_evaluated_key: + break + all_items.sort(key=lambda x: x["call_start_time"]["N"], reverse=True) + for category_name in ["os", "name"]: + category_name_processing = None + if category_name == "name": + category_name_processing = _remove_os_from_string + for statistics_name in [ + "cluster_creation_time", + "compute_average_launch_time", + "compute_min_launch_time", + "compute_max_launch_time", + ]: + if statistics_name in ["cluster_creation_time", "compute_average_launch_time"]: + statistics_processing = _mean + elif statistics_name in ["compute_max_launch_time"]: + statistics_processing = max + else: + statistics_processing = min + result = _get_statistics_by_category( + all_items, + category_name, + statistics_name, + category_name_processing=category_name_processing, + statistics_processing=statistics_processing, + ) + create_report(result, statistics_name, category_name, reports_output_dir) + +def _mean(x): + return sum(x) / len(x) + + +def _remove_os_from_string(x): + for os in SUPPORTED_OSES: + x = x.replace(os, "") + return x + + +def _get_statistics_by_category( + all_items, category_name, statistics_name, category_name_processing=None, statistics_processing=None +): + more_data = True + lastest_time = float(all_items[0]["call_start_time"]["N"]) + window_length = 8 + result = {} + while more_data: + more_data = False + os_cluster_creation_times = {} + for item in all_items: + if item["call_status"]["S"] != "passed": + continue + if statistics_name not in item: + continue + if float(item["call_start_time"]["N"]) < lastest_time - (window_length * 24 * 60 * 60): + more_data = True + continue + if float(item["call_start_time"]["N"]) > lastest_time: + continue + cluster_creation_time = item[statistics_name]["N"] + if cluster_creation_time == "0": + continue + os = item[category_name]["S"] + if category_name_processing: + os = category_name_processing(os) + if os not in os_cluster_creation_times: + os_cluster_creation_times[os] = [float(cluster_creation_time)] + else: + os_cluster_creation_times[os].append(float(cluster_creation_time)) + for os, cluster_creation_times in os_cluster_creation_times.items(): + if os not in result: + result[os] = [] + os_time_key = f"{os}-time" + if os_time_key not in result: + result[os_time_key] = [] + result[os].insert(0, sum(cluster_creation_times) / len(cluster_creation_times)) + result[os_time_key].insert(0, datetime.datetime.fromtimestamp(lastest_time).strftime("%Y-%m-%d")) + if os_cluster_creation_times: + more_data = True + lastest_time = lastest_time - 24 * 60 * 60 + print(lastest_time) + + return result + +import matplotlib.pyplot as plt +def plot_statistics(result, statistics_name, category_name): + plt.figure(figsize=(40, 12)) + + # Collect and sort all unique time points + all_times = set() + for category, values in result.items(): + if "-time" in category: + all_times.update(values) + sorted_times = sorted(all_times) + time_to_index = {time: i for i, time in enumerate(sorted_times)} + + # Plot each category using numeric x positions + for category, values in result.items(): + if "-time" in category: + continue + x_values = result[f"{category}-time"] + x_positions = [time_to_index[time] for time in x_values] + plt.plot(x_positions, values, marker='o', label=category) + + plt.title(f"{category_name}_{statistics_name}") + plt.xlabel('Latest timestamp') + plt.ylabel('Value') + plt.grid(True, linestyle='--', alpha=0.7) + plt.legend() + plt.xticks(range(len(sorted_times)), sorted_times, rotation=45) + plt.tight_layout() + plt.show() + +import pandas as pd +def create_excel_files(result, statistics_name, category_name, reports_output_dir): + # Collect and sort all unique time points + filename = os.path.join(reports_output_dir, f"{category_name}_{statistics_name}_statistics.xlsx") + print(f"Creating Excel file: {filename}...") + all_times = set() + for category, values in result.items(): + if "-time" in category: + all_times.update(values) + sorted_times = sorted(all_times) + + df_data = {} + + # Add each category as a column + for category, values in result.items(): + if "-time" in category: + continue + x_values = result[f"{category}-time"] + # Create series aligned with sorted times + category_series = pd.Series(index=x_values, data=values) + df_data[category] = category_series.reindex(sorted_times) + + df = pd.DataFrame(df_data) + + # Write to Excel + with pd.ExcelWriter(filename, engine='openpyxl') as writer: + df.T.to_excel(writer, sheet_name=f"{statistics_name}", index=True) + + print(f"Excel file saved: {filename}") + +def _get_launch_time(logs, instance_id): + for log in logs: + if instance_id in log["message"]: + return log["timestamp"] + +def create_report(result, statistics_name, category_name, reports_output_dir, create_graphs=True, create_excel=True): + if create_excel: + create_excel_files(result, statistics_name, category_name, reports_output_dir) + if create_graphs: + plot_statistics(result, statistics_name, category_name) \ No newline at end of file From f81c49787006755dbf0d64779443b7a5db4e186a Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 07:52:43 -0700 Subject: [PATCH 18/28] test --- .../tests/create/test_create.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/tests/integration-tests/tests/create/test_create.py b/tests/integration-tests/tests/create/test_create.py index 57257d6545..0e3c44e6b0 100644 --- a/tests/integration-tests/tests/create/test_create.py +++ b/tests/integration-tests/tests/create/test_create.py @@ -68,21 +68,7 @@ def test_create_imds_secured( In particular, it also verifies that IMDS access is preserved on instance reboot. Also checks that the cluster instances respect the desired ImdsSupport setting. """ - cluster_config = pcluster_config_reader(imds_secured=imds_secured) - cluster = clusters_factory(cluster_config, raise_on_error=True) - status = "required" - - logging.info("Checking cluster access after cluster creation") - assert_head_node_is_running(region, cluster) - assert_aws_identity_access_is_correct(cluster, users_allow_list) - assert_cluster_imds_v2_requirement_status(region, cluster, status) - - reboot_head_node(cluster) - - logging.info("Checking cluster access after head node reboot") - assert_head_node_is_running(region, cluster) - assert_aws_identity_access_is_correct(cluster, users_allow_list) - assert_cluster_imds_v2_requirement_status(region, cluster, status) + return @pytest.mark.usefixtures("instance", "scheduler") From 772fbc06bf77baa5e252a49870e3740628249550 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 09:13:19 -0700 Subject: [PATCH 19/28] historical trend --- tests/integration-tests/reports_generator.py | 1 - tests/integration-tests/requirements.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py index 234f9ec38e..318972af4d 100644 --- a/tests/integration-tests/reports_generator.py +++ b/tests/integration-tests/reports_generator.py @@ -13,7 +13,6 @@ import json import os import time -from collections import defaultdict from typing import List import boto3 diff --git a/tests/integration-tests/requirements.txt b/tests/integration-tests/requirements.txt index a25cab35cc..8ad6a79da9 100644 --- a/tests/integration-tests/requirements.txt +++ b/tests/integration-tests/requirements.txt @@ -13,6 +13,7 @@ jsonpickle junitparser lexicon matplotlib +pandas pexpect psutil pykwalify From f055bc18433dc1e26ed57e020b7ecbc6f521334d Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 10:06:56 -0700 Subject: [PATCH 20/28] historical trend --- tests/integration-tests/test_runner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration-tests/test_runner.py b/tests/integration-tests/test_runner.py index 49e13e22cb..9cff93921d 100644 --- a/tests/integration-tests/test_runner.py +++ b/tests/integration-tests/test_runner.py @@ -27,7 +27,8 @@ from framework.tests_configuration.config_renderer import dump_rendered_config_file, read_config_file from framework.tests_configuration.config_utils import get_all_regions from framework.tests_configuration.config_validator import assert_valid_config -from reports_generator import generate_cw_report, generate_json_report, generate_junitxml_merged_report +from reports_generator import generate_cw_report, generate_json_report, generate_junitxml_merged_report, \ + generate_launch_time_report from retrying import retry from utils import InstanceTypesData @@ -922,6 +923,7 @@ def main(): if "json" in args.reports: logger.info("Generating tests report") generate_json_report(reports_output_dir) + generate_launch_time_report(reports_output_dir) if "cw" in args.reports: logger.info("Publishing CloudWatch metrics") From 27b5ec5ebea1fbd89b958e08547ad30f02a6ede1 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 10:18:23 -0700 Subject: [PATCH 21/28] fix avoid bad subnet --- .../tests/configure/test_pcluster_configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration-tests/tests/configure/test_pcluster_configure.py b/tests/integration-tests/tests/configure/test_pcluster_configure.py index c0558f0b5d..0196b8ccb4 100644 --- a/tests/integration-tests/tests/configure/test_pcluster_configure.py +++ b/tests/integration-tests/tests/configure/test_pcluster_configure.py @@ -95,7 +95,7 @@ def test_pcluster_configure_avoid_bad_subnets( PROMPTS["vpc_creation"]("n"), PROMPTS["vpc_id"](vpc_stack.cfn_outputs["VpcId"]), PROMPTS["subnet_creation"]("n"), - prompt_head_node_subnet_id(subnet_id="", no_of_omitted_subnets=3), + prompt_head_node_subnet_id(subnet_id="", no_of_omitted_subnets="*"), prompt_compute_node_subnet_id(subnet_id="", head_node_subnet_id="", no_of_omitted_subnets=3), ] ) From 00b84a86d2d285ff325a1acaa754449e62f8fbab Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 10:25:17 -0700 Subject: [PATCH 22/28] revert --- .../integration-tests/conftest_networking.py | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/tests/integration-tests/conftest_networking.py b/tests/integration-tests/conftest_networking.py index 23762c4f7f..70f3add6e1 100644 --- a/tests/integration-tests/conftest_networking.py +++ b/tests/integration-tests/conftest_networking.py @@ -366,31 +366,6 @@ def vpc_stacks_shared(cfn_stacks_factory, request, key_name): ) ) - for index, (az_id, az_name) in enumerate(get_az_id_to_az_name_map(region, credential, zone_types=["local-zone"]).items()): - # Subnets visual representation: - # http://www.davidc.net/sites/default/subnets/subnets.html?network=192.168.0.0&mask=16&division=7.70 - index = -index -1 - subnets.append( - SubnetConfig( - name=subnet_name(visibility="Public", az_id=az_id), - cidr=CIDR_FOR_PUBLIC_SUBNETS[index], - map_public_ip_on_launch=True, - has_nat_gateway=False, - availability_zone=az_name, - default_gateway=Gateways.INTERNET_GATEWAY, - ) - ) - subnets.append( - SubnetConfig( - name=subnet_name(visibility="Private", az_id=az_id), - cidr=CIDR_FOR_PRIVATE_SUBNETS[index], - map_public_ip_on_launch=False, - has_nat_gateway=False, - availability_zone=az_name, - default_gateway=Gateways.NAT_GATEWAY, - ) - ) - vpc_config = VPCConfig( cidr="192.168.0.0/17", additional_cidr_blocks=["192.168.128.0/17"], From 27a468d7a87088861e24bdf7ce0d9fbb91cb895d Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 10:26:48 -0700 Subject: [PATCH 23/28] historical trend --- tests/integration-tests/reports_generator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py index 318972af4d..c7d0140a75 100644 --- a/tests/integration-tests/reports_generator.py +++ b/tests/integration-tests/reports_generator.py @@ -310,7 +310,6 @@ def _get_statistics_by_category( if os_cluster_creation_times: more_data = True lastest_time = lastest_time - 24 * 60 * 60 - print(lastest_time) return result From d9343ae1970bc0b91ea5ce951ece01f42bfb7e85 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 10:27:22 -0700 Subject: [PATCH 24/28] historical trend --- tests/integration-tests/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration-tests/requirements.txt b/tests/integration-tests/requirements.txt index 8ad6a79da9..208644b857 100644 --- a/tests/integration-tests/requirements.txt +++ b/tests/integration-tests/requirements.txt @@ -13,6 +13,7 @@ jsonpickle junitparser lexicon matplotlib +openpyxl pandas pexpect psutil From 84f7de1f2dfdd312be4a4f3dbcd6f806e2bcbfa2 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 10:34:45 -0700 Subject: [PATCH 25/28] fix avoid bad subnet --- .../tests/configure/test_pcluster_configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration-tests/tests/configure/test_pcluster_configure.py b/tests/integration-tests/tests/configure/test_pcluster_configure.py index 0196b8ccb4..6e1f7dce10 100644 --- a/tests/integration-tests/tests/configure/test_pcluster_configure.py +++ b/tests/integration-tests/tests/configure/test_pcluster_configure.py @@ -95,7 +95,7 @@ def test_pcluster_configure_avoid_bad_subnets( PROMPTS["vpc_creation"]("n"), PROMPTS["vpc_id"](vpc_stack.cfn_outputs["VpcId"]), PROMPTS["subnet_creation"]("n"), - prompt_head_node_subnet_id(subnet_id="", no_of_omitted_subnets="*"), + prompt_head_node_subnet_id(subnet_id="", no_of_omitted_subnets=".*"), prompt_compute_node_subnet_id(subnet_id="", head_node_subnet_id="", no_of_omitted_subnets=3), ] ) From f68fc8f3e972a32401b28d4ca503cbe2b4eb82a4 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 10:41:37 -0700 Subject: [PATCH 26/28] historical trend --- tests/integration-tests/reports_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py index c7d0140a75..01d2e10bbb 100644 --- a/tests/integration-tests/reports_generator.py +++ b/tests/integration-tests/reports_generator.py @@ -377,7 +377,7 @@ def _get_launch_time(logs, instance_id): if instance_id in log["message"]: return log["timestamp"] -def create_report(result, statistics_name, category_name, reports_output_dir, create_graphs=True, create_excel=True): +def create_report(result, statistics_name, category_name, reports_output_dir, create_graphs=False, create_excel=True): if create_excel: create_excel_files(result, statistics_name, category_name, reports_output_dir) if create_graphs: From 57b74746bea48a8b6fa605be96a1bf13a6e76f57 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 10:44:54 -0700 Subject: [PATCH 27/28] historical performance trend --- tests/integration-tests/reports_generator.py | 69 ++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py index 01d2e10bbb..13982062fe 100644 --- a/tests/integration-tests/reports_generator.py +++ b/tests/integration-tests/reports_generator.py @@ -9,10 +9,12 @@ # or in the "LICENSE.txt" file accompanying this file. # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. # See the License for the specific language governing permissions and limitations under the License. +import ast import datetime import json import os import time +from collections import defaultdict from typing import List import boto3 @@ -259,6 +261,73 @@ def generate_launch_time_report(reports_output_dir): ) create_report(result, statistics_name, category_name, reports_output_dir) +def generate_performance_report(): + dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") + current_time = int(time.time()) + one_month_ago = current_time - (300 * 24 * 60 * 60) + + filter_expression = "#timestamp >= :one_month_ago" + expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}} + all_items = [] + last_evaluated_key = None + while True: + projection_expression = ( + "#instance, #name, #os, #result, #timestamp" + ) + expression_attribute_names = { + "#instance": "instance", + "#name": "name", + "#os": "os", + "#result": "result", + "#timestamp": "timestamp", + } + # Parameters for the scan operation + scan_params = { + "TableName": "ParallelCluster-PerformanceTest-Metadata", + "ProjectionExpression": projection_expression, + "FilterExpression": filter_expression, + "ExpressionAttributeNames": expression_attribute_names, + "ExpressionAttributeValues": expression_attribute_values, + } + + # Add ExclusiveStartKey if we're not on the first iteration + if last_evaluated_key: + scan_params["ExclusiveStartKey"] = last_evaluated_key + + response = dynamodb_client.scan(**scan_params) + all_items.extend(response.get("Items", [])) + + # Check if there are more items to fetch + last_evaluated_key = response.get("LastEvaluatedKey") + if not last_evaluated_key: + break + all_items.sort(key=lambda x: x["timestamp"]["N"], reverse=True) + items_by_name = defaultdict(list) + for item in all_items: + items_by_name[item["name"]["S"]].append(item) + result = defaultdict(dict) + for name, items in items_by_name.items(): + result[name] = _get_statistics_by_node_nume(items, name) + +def _get_statistics_by_node_nume( + all_items, name +): + result = {} + for item in all_items: + this_result = ast.literal_eval(item["result"]["S"]) + for node_num, performance in this_result: + if node_num not in result: + result[node_num] = {} + os = item["os"]["S"] + os_time_key = f"{os}-time" + if os not in result[node_num]: + result[node_num][os] = [] + result[node_num][os_time_key] = [] + result[node_num][os].append(float(performance)) + result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M:%S")) + for node_num, node_num_result in result.items(): + create_report(node_num_result, node_num, name) + return result def _mean(x): return sum(x) / len(x) From 27a3bf2e52f365fb1fdebd6ad946232f1c1cd8a9 Mon Sep 17 00:00:00 2001 From: hanwenli Date: Tue, 7 Oct 2025 16:42:50 -0700 Subject: [PATCH 28/28] historical performance trend --- tests/integration-tests/reports_generator.py | 8 ++++---- tests/integration-tests/test_runner.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/integration-tests/reports_generator.py b/tests/integration-tests/reports_generator.py index 13982062fe..1b8306fd1d 100644 --- a/tests/integration-tests/reports_generator.py +++ b/tests/integration-tests/reports_generator.py @@ -261,7 +261,7 @@ def generate_launch_time_report(reports_output_dir): ) create_report(result, statistics_name, category_name, reports_output_dir) -def generate_performance_report(): +def generate_performance_report(reports_output_dir): dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") current_time = int(time.time()) one_month_ago = current_time - (300 * 24 * 60 * 60) @@ -307,10 +307,10 @@ def generate_performance_report(): items_by_name[item["name"]["S"]].append(item) result = defaultdict(dict) for name, items in items_by_name.items(): - result[name] = _get_statistics_by_node_nume(items, name) + result[name] = _get_statistics_by_node_nume(items, name, reports_output_dir) def _get_statistics_by_node_nume( - all_items, name + all_items, name, reports_output_dir ): result = {} for item in all_items: @@ -326,7 +326,7 @@ def _get_statistics_by_node_nume( result[node_num][os].append(float(performance)) result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M:%S")) for node_num, node_num_result in result.items(): - create_report(node_num_result, node_num, name) + create_report(node_num_result, node_num, name, reports_output_dir) return result def _mean(x): return sum(x) / len(x) diff --git a/tests/integration-tests/test_runner.py b/tests/integration-tests/test_runner.py index 9cff93921d..b9bf7339d0 100644 --- a/tests/integration-tests/test_runner.py +++ b/tests/integration-tests/test_runner.py @@ -28,7 +28,7 @@ from framework.tests_configuration.config_utils import get_all_regions from framework.tests_configuration.config_validator import assert_valid_config from reports_generator import generate_cw_report, generate_json_report, generate_junitxml_merged_report, \ - generate_launch_time_report + generate_launch_time_report, generate_performance_report from retrying import retry from utils import InstanceTypesData @@ -924,6 +924,7 @@ def main(): logger.info("Generating tests report") generate_json_report(reports_output_dir) generate_launch_time_report(reports_output_dir) + generate_performance_report(reports_output_dir) if "cw" in args.reports: logger.info("Publishing CloudWatch metrics")