sfarrukhm
diff --git a/‎.github/workflows/cd.yml‎
Lines changed: 0 additions & 2 deletions b/‎.github/workflows/cd.yml‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/ci.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎analyze_simulator_logs.py‎
Lines changed: 8 additions & 2 deletions b/‎analyze_simulator_logs.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎load_simulator.py‎
Lines changed: 18 additions & 8 deletions b/‎load_simulator.py‎
Lines changed: 18 additions & 8 deletions
diff --git a/‎logs/Figure_1.png‎
-169 KB b/‎logs/Figure_1.png‎
-169 KB
diff --git a/‎logs/latency-plot-with-no-q.png‎
138 KB b/‎logs/latency-plot-with-no-q.png‎
138 KB
diff --git a/‎logs/latency-plot-with-q.png‎
106 KB b/‎logs/latency-plot-with-q.png‎
106 KB
diff --git a/‎logs/latency-stats.txt‎
Lines changed: 16 additions & 0 deletions b/‎logs/latency-stats.txt‎
Lines changed: 16 additions & 0 deletions
@@ -11,8 +11,6 @@ on:
   push:
     branches:
       - main
-      - feature/**  # allow testing
-      - dev
 
   workflow_dispatch:
 
 
@@ -4,7 +4,8 @@ on:
   push:
     branches:
       - main
-      # - 'feature/*'
+      - feature/**  # allow testing
+      
   pull_request:
     branches:
       - main
 
@@ -3,6 +3,7 @@
 import matplotlib.dates as mdates
 from datetime import datetime
 import numpy as np
+
 # -----------------------------
 # Config
 # -----------------------------
@@ -15,8 +16,9 @@
 latencies = []
 results = []
 
+# Regex: capture timestamp, result, latency; ignore any extra fields like Q
 pattern = re.compile(
-    r"\[(.*?)\].*?\|\s*Result:\s*(\w+)\s*\|\s*Latency:\s*([\d\.]+)ms"
+    r"\[(.*?)\].*?\|\s*Result:\s*(\w+).*?\|\s*Latency:\s*([\d\.]+)ms"
 )
 
 with open(LOG_FILE, "r") as f:
@@ -30,6 +32,10 @@
 
 print(f"Parsed {len(latencies)} requests from log.")
 
+if not latencies:
+    print("No data found. Check log file path or format.")
+    exit()
+
 # -----------------------------
 # Plot latency over time (with moving average)
 # -----------------------------
@@ -65,7 +71,7 @@
     print("\n=== Summary ===")
     print(f"Min latency: {min(latencies):.2f} ms")
     print(f"Avg latency: {sum(latencies)/len(latencies):.2f} ms")
-    print(f"P95 latency: {sorted(latencies)[int(len(latencies)*0.95)]:.2f} ms")
+    print(f"P95 latency: {np.percentile(latencies, 95):.2f} ms")
     print(f"Max latency: {max(latencies):.2f} ms")
 
     pos = results.count("positive")
 
@@ -11,8 +11,6 @@
 # Config
 # -----------------------------
 api_url = "http://52.13.56.115:8000/predict"
-concurrent_users = 100
-total_requests = 5000
 
 LOG_DIR = "logs"
 os.makedirs(LOG_DIR, exist_ok=True)
@@ -69,18 +67,19 @@
 # -----------------------------
 # Async request
 # -----------------------------
-async def send_request(session, text):
+async def send_request(session, text,quantize):
     start = time.perf_counter()
     timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+    payload={"text": text,'quantize':quantize}
 
     try:
-        async with session.get(api_url, params={"text": text}) as response:
+        async with session.post(api_url, json=payload) as response:
             result = await response.json()
             latency = (time.perf_counter() - start) * 1000  # ms
 
             # log only to file
             logger.info(
-                f"[{timestamp}] Text: {text[:30]:<30} | Result: {result.get('sentiment', 'N/A'):<8} | Latency: {latency:.2f}ms"
+                f"[{timestamp}] Text: {text[:30]:<30} | Result: {result.get('sentiment', 'N/A'):<8} | Q:{result.get('quantized')} | Latency: {latency:.2f}ms"
             )
     except Exception as e:
         logger.error(f"Error: {e}")
@@ -89,13 +88,13 @@ async def send_request(session, text):
 # -----------------------------
 # Main function with progress bar
 # -----------------------------
-async def main():
+async def main(concurent_users, total_requests,quantize):
     async with aiohttp.ClientSession() as session:
         tasks = []
         with tqdm(total=total_requests, desc="Simulating load", ncols=80) as pbar:
             for i in range(total_requests):
                 text = random.choice(sample_texts)
-                task = asyncio.create_task(send_request(session, text))
+                task = asyncio.create_task(send_request(session, text,quantize))
                 tasks.append(task)
 
                 if len(tasks) >= concurrent_users:
@@ -109,4 +108,15 @@ async def main():
 
 
 if __name__ == "__main__":
-    asyncio.run(main())
+    import argparse
+    parser = argparse.ArgumentParser(description="Load Simulator for Sentiment API")
+    parser.add_argument('--concurrent', type=int, default=1, help='Number of concurrent users')
+    parser.add_argument('--requests', type=int, default=1, help='Total number of requests to send')
+    parser.add_argument('--quantize',type=str,default="true", help="Whether to use quantized model or fp32 model")
+    args=parser.parse_args()
+    concurrent_users = args.concurrent
+    total_requests = args.requests
+    quantize = args.quantize == "true"
+
+    asyncio.run(main(concurrent_users, total_requests,quantize))
+    
@@ -0,0 +1,16 @@
+With quantization
+=== Summary ===
+Min latency: 291.97 ms
+Avg latency: 1302.57 ms
+P95 latency: 2581.50 ms
+Max latency: 4830.67 ms
+Positive: 1732, Negative: 1268
+
+Without quantization
+
+=== Summary ===
+Min latency: 331.52 ms
+Avg latency: 3274.15 ms
+P95 latency: 5912.65 ms
+Max latency: 9143.29 ms
+Positive: 1584, Negative: 1198