Skip to content

Commit d1191d2

Browse files
committed
(benchmark): Add a script to analyse the log data
1 parent f656e03 commit d1191d2

4 files changed

Lines changed: 86 additions & 7 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
benchmark/data/*
2+
benchmark/scripts/plots/*
23
__pycache__/*
34
.DS_Store

TODO.md

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,10 @@
33
## Lambdas
44

55
- Python
6-
- Ruby
76
- Java
87

98
- Use futures / async to write to DynamoDB concurrently in the Rust lambda
109

11-
## Infrastucture
12-
13-
- Deploy the other lambdas
14-
1510
## Benchmark
1611

17-
- Initial analysis - see what's there
18-
- Script for reproducible analyses
12+
- Redo analysis with all the Lambda data
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import seaborn as sns
2+
import matplotlib.pyplot as plt
3+
import pandas as pd
4+
import numpy as np
5+
6+
sns.set_theme(style="whitegrid", palette="colorblind")
7+
8+
9+
def save_plot(ax, plot_name: str) -> None:
10+
figure = ax.get_figure()
11+
figure.savefig(f"plots/{plot_name}.png")
12+
plt.close(figure)
13+
14+
15+
def print_and_plot_init_duration_by_language(data: pd.DataFrame) -> None:
16+
cold_starts = data[data["cold_start"] == True]
17+
18+
print("Cold start times per language - p50, p95, p99")
19+
print(
20+
cold_starts.groupby("name")["init_duration_ms"]
21+
.quantile(q=np.array([0.50, 0.95, 0.99]))
22+
.unstack()
23+
)
24+
print("-" * 10)
25+
26+
ax = sns.histplot(x="init_duration_ms", hue="name", binwidth=20, data=data)
27+
ax.set_title("Cold start times by language")
28+
ax.set_xlabel("Init duration (ms)")
29+
ax.set_ylabel("Count")
30+
ax.set_xlim(0)
31+
sns.despine()
32+
save_plot(ax, "init_duration_by_language")
33+
34+
35+
def print_and_plot_run_time_by_language(data: pd.DataFrame) -> None:
36+
print("Execution times per language - p50, p95, p99")
37+
print(
38+
data.groupby(["name", "cold_start"])["execution_time_ms"]
39+
.quantile(q=np.array([0.50, 0.95, 0.99]))
40+
.unstack()
41+
)
42+
print("-" * 10)
43+
44+
ax = sns.histplot(x="execution_time_ms", hue="name", binwidth=5, data=data)
45+
ax.set_title("Execution times by language")
46+
ax.set_xlabel("Execution time (ms)")
47+
ax.set_ylabel("Count")
48+
ax.set_xlim(0, 500)
49+
sns.despine()
50+
save_plot(ax, "run_time_by_language")
51+
52+
53+
def print_total_cost_by_language(data: pd.DataFrame) -> None:
54+
# From August 1st 2025 AWS will charge for the INIT phase
55+
# https://aws.amazon.com/blogs/compute/aws-lambda-standardizes-billing-for-init-phase/
56+
data["total_time"] = data["execution_time_ms"] + data["init_duration_ms"]
57+
totals = data.groupby("name")[
58+
["init_duration_ms", "execution_time_ms", "total_time"]
59+
].sum()
60+
totals["init_percentage"] = totals["init_duration_ms"] / totals["total_time"]
61+
62+
print("Total billed time by language")
63+
print(totals)
64+
print("-" * 10)
65+
66+
67+
if __name__ == "__main__":
68+
data = pd.read_csv("../data/parsed_cloudwatch_logs.csv")
69+
print_and_plot_init_duration_by_language(data=data)
70+
print_and_plot_run_time_by_language(data=data)
71+
print_total_cost_by_language(data=data)

benchmark/scripts/requirements.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,20 @@
11
boto3==1.38.6
22
botocore==1.38.6
3+
contourpy==1.3.2
4+
cycler==0.12.1
5+
fonttools==4.58.0
36
jmespath==1.0.1
7+
kiwisolver==1.4.8
8+
matplotlib==3.10.3
9+
numpy==2.2.5
10+
packaging==25.0
11+
pandas==2.2.3
12+
pillow==11.2.1
13+
pyparsing==3.2.3
414
python-dateutil==2.9.0.post0
15+
pytz==2025.2
516
s3transfer==0.12.0
17+
seaborn==0.13.2
618
six==1.17.0
19+
tzdata==2025.2
720
urllib3==2.4.0

0 commit comments

Comments
 (0)