Skip to content

Commit 50d4749

Browse files
committed
improve simulator
1 parent 24e5968 commit 50d4749

File tree

9 files changed

+3044
-5013
lines changed

9 files changed

+3044
-5013
lines changed

.github/workflows/cd.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ on:
1111
push:
1212
branches:
1313
- main
14-
- feature/** # allow testing
15-
- dev
1614

1715
workflow_dispatch:
1816

.github/workflows/ci.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ on:
44
push:
55
branches:
66
- main
7-
# - 'feature/*'
7+
- feature/** # allow testing
8+
89
pull_request:
910
branches:
1011
- main

analyze_simulator_logs.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import matplotlib.dates as mdates
44
from datetime import datetime
55
import numpy as np
6+
67
# -----------------------------
78
# Config
89
# -----------------------------
@@ -15,8 +16,9 @@
1516
latencies = []
1617
results = []
1718

19+
# Regex: capture timestamp, result, latency; ignore any extra fields like Q
1820
pattern = re.compile(
19-
r"\[(.*?)\].*?\|\s*Result:\s*(\w+)\s*\|\s*Latency:\s*([\d\.]+)ms"
21+
r"\[(.*?)\].*?\|\s*Result:\s*(\w+).*?\|\s*Latency:\s*([\d\.]+)ms"
2022
)
2123

2224
with open(LOG_FILE, "r") as f:
@@ -30,6 +32,10 @@
3032

3133
print(f"Parsed {len(latencies)} requests from log.")
3234

35+
if not latencies:
36+
print("No data found. Check log file path or format.")
37+
exit()
38+
3339
# -----------------------------
3440
# Plot latency over time (with moving average)
3541
# -----------------------------
@@ -65,7 +71,7 @@
6571
print("\n=== Summary ===")
6672
print(f"Min latency: {min(latencies):.2f} ms")
6773
print(f"Avg latency: {sum(latencies)/len(latencies):.2f} ms")
68-
print(f"P95 latency: {sorted(latencies)[int(len(latencies)*0.95)]:.2f} ms")
74+
print(f"P95 latency: {np.percentile(latencies, 95):.2f} ms")
6975
print(f"Max latency: {max(latencies):.2f} ms")
7076

7177
pos = results.count("positive")

load_simulator.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
# Config
1212
# -----------------------------
1313
api_url = "http://52.13.56.115:8000/predict"
14-
concurrent_users = 100
15-
total_requests = 5000
1614

1715
LOG_DIR = "logs"
1816
os.makedirs(LOG_DIR, exist_ok=True)
@@ -69,18 +67,19 @@
6967
# -----------------------------
7068
# Async request
7169
# -----------------------------
72-
async def send_request(session, text):
70+
async def send_request(session, text,quantize):
7371
start = time.perf_counter()
7472
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
73+
payload={"text": text,'quantize':quantize}
7574

7675
try:
77-
async with session.get(api_url, params={"text": text}) as response:
76+
async with session.post(api_url, json=payload) as response:
7877
result = await response.json()
7978
latency = (time.perf_counter() - start) * 1000 # ms
8079

8180
# log only to file
8281
logger.info(
83-
f"[{timestamp}] Text: {text[:30]:<30} | Result: {result.get('sentiment', 'N/A'):<8} | Latency: {latency:.2f}ms"
82+
f"[{timestamp}] Text: {text[:30]:<30} | Result: {result.get('sentiment', 'N/A'):<8} | Q:{result.get('quantized')} | Latency: {latency:.2f}ms"
8483
)
8584
except Exception as e:
8685
logger.error(f"Error: {e}")
@@ -89,13 +88,13 @@ async def send_request(session, text):
8988
# -----------------------------
9089
# Main function with progress bar
9190
# -----------------------------
92-
async def main():
91+
async def main(concurent_users, total_requests,quantize):
9392
async with aiohttp.ClientSession() as session:
9493
tasks = []
9594
with tqdm(total=total_requests, desc="Simulating load", ncols=80) as pbar:
9695
for i in range(total_requests):
9796
text = random.choice(sample_texts)
98-
task = asyncio.create_task(send_request(session, text))
97+
task = asyncio.create_task(send_request(session, text,quantize))
9998
tasks.append(task)
10099

101100
if len(tasks) >= concurrent_users:
@@ -109,4 +108,15 @@ async def main():
109108

110109

111110
if __name__ == "__main__":
112-
asyncio.run(main())
111+
import argparse
112+
parser = argparse.ArgumentParser(description="Load Simulator for Sentiment API")
113+
parser.add_argument('--concurrent', type=int, default=1, help='Number of concurrent users')
114+
parser.add_argument('--requests', type=int, default=1, help='Total number of requests to send')
115+
parser.add_argument('--quantize',type=str,default="true", help="Whether to use quantized model or fp32 model")
116+
args=parser.parse_args()
117+
concurrent_users = args.concurrent
118+
total_requests = args.requests
119+
quantize = args.quantize == "true"
120+
121+
asyncio.run(main(concurrent_users, total_requests,quantize))
122+

logs/Figure_1.png

-169 KB
Binary file not shown.

logs/latency-plot-with-no-q.png

138 KB
Loading

logs/latency-plot-with-q.png

106 KB
Loading

logs/latency-stats.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
With quantization
2+
=== Summary ===
3+
Min latency: 291.97 ms
4+
Avg latency: 1302.57 ms
5+
P95 latency: 2581.50 ms
6+
Max latency: 4830.67 ms
7+
Positive: 1732, Negative: 1268
8+
9+
Without quantization
10+
11+
=== Summary ===
12+
Min latency: 331.52 ms
13+
Avg latency: 3274.15 ms
14+
P95 latency: 5912.65 ms
15+
Max latency: 9143.29 ms
16+
Positive: 1584, Negative: 1198

0 commit comments

Comments
 (0)