cijothomas
diff --git a/‎.github/workflows/pipeline-perf-test-continuous.yml‎
Lines changed: 2 additions & 152 deletions b/‎.github/workflows/pipeline-perf-test-continuous.yml‎
Lines changed: 2 additions & 152 deletions
diff --git a/‎.github/workflows/scripts/compute-scaling-metrics.py‎
Lines changed: 101 additions & 0 deletions b/‎.github/workflows/scripts/compute-scaling-metrics.py‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎.github/workflows/scripts/generate-scaling-summary.py‎
Lines changed: 133 additions & 0 deletions b/‎.github/workflows/scripts/generate-scaling-summary.py‎
Lines changed: 133 additions & 0 deletions
@@ -181,161 +181,11 @@ jobs:
 
           - name: Compute scaling efficiency metrics
             run: |
-              python3 << 'EOF'
-              import json
-              
-              with open('output-saturation.json') as f:
-                  data = json.load(f)
-              
-              # Group by cores and protocol - collect throughput and CPU
-              throughput = {}
-              cpu_norm = {}
-              for entry in data:
-                  extra = entry['extra']
-                  parts = extra.split(' - ')
-                  if len(parts) < 3:
-                      continue
-                  
-                  cores = parts[2].split('/')[0].split()[0]
-                  protocol = extra.split('/')[1].split(' - ')[0] if '/' in extra else 'unknown'
-                  key = f"{cores}core-{protocol}"
-                  
-                  if entry['name'] == 'logs_received_rate':
-                      throughput[key] = entry['value']
-                  elif entry['name'] == 'cpu_percentage_normalized_avg':
-                      cpu_norm[key] = entry['value']
-              
-              # Calculate scaling metrics
-              scaling_metrics = []
-              for protocol in ['OTLP-ATTR-OTLP', 'OTAP-ATTR-OTLP']:
-                  baseline = throughput.get(f"1core-{protocol}", 1)
-                  
-                  for cores in ['1', '2', '4', '8']:
-                      key = f"{cores}core-{protocol}"
-                      if key in throughput:
-                          cores_int = int(cores)
-                          actual_speedup = throughput[key] / baseline if baseline > 0 else 0
-                          ideal_speedup = cores_int
-                          efficiency = (actual_speedup / ideal_speedup) * 100 if ideal_speedup > 0 else 0
-                          per_core = throughput[key] / cores_int
-                          
-                          # Add efficiency metric (higher is better)
-                          scaling_metrics.append({
-                              "name": "scaling_efficiency",
-                              "unit": "%",
-                              "value": efficiency,
-                              "extra": f"Continuous - Saturation - {cores} Core(s)/{protocol} - Scaling Efficiency"
-                          })
-                          
-                          # Add per-core throughput (should remain constant for linear scaling)
-                          scaling_metrics.append({
-                              "name": "per_core_throughput",
-                              "unit": "logs/sec/core",
-                              "value": per_core,
-                              "extra": f"Continuous - Saturation - {cores} Core(s)/{protocol} - Per-Core Throughput"
-                          })
-                          
-                          # Add speedup metric
-                          scaling_metrics.append({
-                              "name": "speedup",
-                              "unit": "x",
-                              "value": actual_speedup,
-                              "extra": f"Continuous - Saturation - {cores} Core(s)/{protocol} - Speedup vs 1-core"
-                          })
-              
-              # Merge with original data
-              data.extend(scaling_metrics)
-              
-              with open('output-saturation.json', 'w') as f:
-                  json.dump(data, f, indent=2)
-              
-              print(f"Added {len(scaling_metrics)} scaling metrics")
-              EOF
+              python3 .github/workflows/scripts/compute-scaling-metrics.py output-saturation.json output-saturation.json
 
           - name: Generate scaling analysis summary
             run: |
-              python3 << 'EOF'
-              import json
-              import os
-              
-              with open('output-saturation.json') as f:
-                  data = json.load(f)
-              
-              # Group metrics by configuration
-              metrics = {}
-              for entry in data:
-                  extra = entry['extra']
-                  parts = extra.split(' - ')
-                  if len(parts) < 3:
-                      continue
-                  
-                  cores = parts[2].split('/')[0].split()[0]
-                  protocol = extra.split('/')[1].split(' - ')[0] if '/' in extra else 'unknown'
-                  key = f"{cores}core-{protocol}"
-                  
-                  if key not in metrics:
-                      metrics[key] = {}
-                  
-                  name = entry['name']
-                  if name == 'logs_received_rate':
-                      metrics[key]['throughput'] = entry['value']
-                  elif name == 'cpu_percentage_normalized_avg':
-                      metrics[key]['cpu'] = entry['value']
-                  elif name == 'speedup':
-                      metrics[key]['speedup'] = entry['value']
-                  elif name == 'scaling_efficiency':
-                      metrics[key]['efficiency'] = entry['value']
-                  elif name == 'dropped_logs_percentage':
-                      metrics[key]['dropped'] = entry['value']
-              
-              # Write to GitHub Step Summary
-              with open(os.environ['GITHUB_STEP_SUMMARY'], 'a') as f:
-                  f.write("\n## 🚀 Core Scaling Analysis\n\n")
-                  f.write("### OTLP Protocol\n\n")
-                  f.write("| Cores | Throughput (logs/s) | Speedup | Efficiency | CPU % | Dropped % |\n")
-                  f.write("|-------|--------------------:|--------:|-----------:|------:|----------:|\n")
-                  
-                  for cores in ['1', '2', '4', '8']:
-                      key = f"{cores}core-OTLP-ATTR-OTLP"
-                      if key in metrics and 'throughput' in metrics[key]:
-                          m = metrics[key]
-                          speedup = m.get('speedup', 0)
-                          efficiency = m.get('efficiency', 0)
-                          cpu = m.get('cpu', 0)
-                          throughput = m.get('throughput', 0)
-                          dropped = m.get('dropped', 0)
-                          
-                          # Add status emoji
-                          eff_emoji = "🟢" if efficiency >= 80 else "🟡" if efficiency >= 60 else "🔴"
-                          cpu_emoji = "✅" if cpu >= 90 else "⚠️" if cpu >= 70 else "❌"
-                          
-                          f.write(f"| {cores} {eff_emoji} | {throughput:>15,.0f} | {speedup:>5.2f}x | {efficiency:>8.1f}% | {cpu:>4.1f}% {cpu_emoji} | {dropped:>6.2f}% |\n")
-                  
-                  f.write("\n### OTAP Protocol\n\n")
-                  f.write("| Cores | Throughput (logs/s) | Speedup | Efficiency | CPU % | Dropped % |\n")
-                  f.write("|-------|--------------------:|--------:|-----------:|------:|----------:|\n")
-                  
-                  for cores in ['1', '2', '4', '8']:
-                      key = f"{cores}core-OTAP-ATTR-OTLP"
-                      if key in metrics and 'throughput' in metrics[key]:
-                          m = metrics[key]
-                          speedup = m.get('speedup', 0)
-                          efficiency = m.get('efficiency', 0)
-                          cpu = m.get('cpu', 0)
-                          throughput = m.get('throughput', 0)
-                          dropped = m.get('dropped', 0)
-                          
-                          # Add status emoji
-                          eff_emoji = "🟢" if efficiency >= 80 else "🟡" if efficiency >= 60 else "🔴"
-                          cpu_emoji = "✅" if cpu >= 90 else "⚠️" if cpu >= 70 else "❌"
-                          
-                          f.write(f"| {cores} {eff_emoji} | {throughput:>15,.0f} | {speedup:>5.2f}x | {efficiency:>8.1f}% | {cpu:>4.1f}% {cpu_emoji} | {dropped:>6.2f}% |\n")
-                  
-                  f.write("\n**Legend:**\n")
-                  f.write("- 🟢 Efficiency ≥80% | 🟡 60-80% | 🔴 <60%\n")
-                  f.write("- ✅ CPU ≥90% (saturated) | ⚠️ 70-90% | ❌ <70% (under-utilized)\n")
-                  f.write("- **Ideal Linear Scaling:** Efficiency = 100%, Speedup = # of cores\n\n")
-              EOF
+              python3 .github/workflows/scripts/generate-scaling-summary.py output-saturation.json
 
           - name: Update pipeline benchmark data and deploy to GitHub Pages
             uses: benchmark-action/github-action-benchmark@d48d326b4ca9ba73ca0cd0d59f108f9e02a381c7
 
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+Compute scaling efficiency metrics from benchmark data.
+
+Reads benchmark JSON data and adds computed metrics:
+- scaling_efficiency: How close to ideal linear scaling (100% = perfect)
+- per_core_throughput: Throughput divided by number of cores
+- speedup: Actual speedup compared to 1-core baseline
+
+Usage:
+    python compute-scaling-metrics.py <input-file> <output-file>
+    
+Example:
+    python compute-scaling-metrics.py output-saturation.json output-saturation-with-metrics.json
+"""
+
+import json
+import sys
+
+
+def compute_scaling_metrics(input_file, output_file):
+    """Compute and add scaling metrics to benchmark data."""
+    
+    with open(input_file) as f:
+        data = json.load(f)
+    
+    # Group by cores and protocol - collect throughput and CPU
+    throughput = {}
+    cpu_norm = {}
+    for entry in data:
+        extra = entry['extra']
+        parts = extra.split(' - ')
+        if len(parts) < 3:
+            continue
+        
+        cores = parts[2].split('/')[0].split()[0]
+        protocol = extra.split('/')[1].split(' - ')[0] if '/' in extra else 'unknown'
+        key = f"{cores}core-{protocol}"
+        
+        if entry['name'] == 'logs_received_rate':
+            throughput[key] = entry['value']
+        elif entry['name'] == 'cpu_percentage_normalized_avg':
+            cpu_norm[key] = entry['value']
+    
+    # Calculate scaling metrics
+    scaling_metrics = []
+    for protocol in ['OTLP-ATTR-OTLP', 'OTAP-ATTR-OTLP']:
+        baseline = throughput.get(f"1core-{protocol}", 1)
+        
+        for cores in ['1', '2', '4', '8']:
+            key = f"{cores}core-{protocol}"
+            if key in throughput:
+                cores_int = int(cores)
+                actual_speedup = throughput[key] / baseline if baseline > 0 else 0
+                ideal_speedup = cores_int
+                efficiency = (actual_speedup / ideal_speedup) * 100 if ideal_speedup > 0 else 0
+                per_core = throughput[key] / cores_int
+                
+                # Add efficiency metric (higher is better)
+                scaling_metrics.append({
+                    "name": "scaling_efficiency",
+                    "unit": "%",
+                    "value": efficiency,
+                    "extra": f"Continuous - Saturation - {cores} Core(s)/{protocol} - Scaling Efficiency"
+                })
+                
+                # Add per-core throughput (should remain constant for linear scaling)
+                scaling_metrics.append({
+                    "name": "per_core_throughput",
+                    "unit": "logs/sec/core",
+                    "value": per_core,
+                    "extra": f"Continuous - Saturation - {cores} Core(s)/{protocol} - Per-Core Throughput"
+                })
+                
+                # Add speedup metric
+                scaling_metrics.append({
+                    "name": "speedup",
+                    "unit": "x",
+                    "value": actual_speedup,
+                    "extra": f"Continuous - Saturation - {cores} Core(s)/{protocol} - Speedup vs 1-core"
+                })
+    
+    # Merge with original data
+    data.extend(scaling_metrics)
+    
+    with open(output_file, 'w') as f:
+        json.dump(data, f, indent=2)
+    
+    print(f"Added {len(scaling_metrics)} scaling metrics")
+    print(f"Output written to: {output_file}")
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        print(__doc__)
+        sys.exit(1)
+    
+    input_file = sys.argv[1]
+    output_file = sys.argv[2]
+    
+    compute_scaling_metrics(input_file, output_file)
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""
+Generate scaling analysis summary table from benchmark data.
+
+Reads benchmark JSON with computed metrics and generates a markdown summary table
+showing throughput, speedup, efficiency, CPU usage, and dropped logs for each configuration.
+
+Usage:
+    python generate-scaling-summary.py <input-file> [output-file]
+    
+Examples:
+    # Print to console
+    python generate-scaling-summary.py output-saturation.json
+    
+    # Write to file
+    python generate-scaling-summary.py output-saturation.json summary.md
+    
+    # Append to GitHub Step Summary (CI mode)
+    GITHUB_STEP_SUMMARY=summary.md python generate-scaling-summary.py output-saturation.json
+"""
+
+import json
+import sys
+import os
+
+
+def generate_summary(input_file, output_file=None):
+    """Generate scaling analysis summary table."""
+    
+    with open(input_file) as f:
+        data = json.load(f)
+    
+    # Group metrics by configuration
+    metrics = {}
+    for entry in data:
+        extra = entry['extra']
+        parts = extra.split(' - ')
+        if len(parts) < 3:
+            continue
+        
+        cores = parts[2].split('/')[0].split()[0]
+        protocol = extra.split('/')[1].split(' - ')[0] if '/' in extra else 'unknown'
+        key = f"{cores}core-{protocol}"
+        
+        if key not in metrics:
+            metrics[key] = {}
+        
+        name = entry['name']
+        if name == 'logs_received_rate':
+            metrics[key]['throughput'] = entry['value']
+        elif name == 'cpu_percentage_normalized_avg':
+            metrics[key]['cpu'] = entry['value']
+        elif name == 'speedup':
+            metrics[key]['speedup'] = entry['value']
+        elif name == 'scaling_efficiency':
+            metrics[key]['efficiency'] = entry['value']
+        elif name == 'dropped_logs_percentage':
+            metrics[key]['dropped'] = entry['value']
+    
+    # Build markdown output
+    lines = []
+    lines.append("\n## 🚀 Core Scaling Analysis\n")
+    lines.append("### OTLP Protocol\n")
+    lines.append("| Cores | Throughput (logs/s) | Speedup | Efficiency | CPU % | Dropped % |\n")
+    lines.append("|-------|--------------------:|--------:|-----------:|------:|----------:|\n")
+    
+    for cores in ['1', '2', '4', '8']:
+        key = f"{cores}core-OTLP-ATTR-OTLP"
+        if key in metrics and 'throughput' in metrics[key]:
+            m = metrics[key]
+            speedup = m.get('speedup', 0)
+            efficiency = m.get('efficiency', 0)
+            cpu = m.get('cpu', 0)
+            throughput = m.get('throughput', 0)
+            dropped = m.get('dropped', 0)
+            
+            # Add status emoji
+            eff_emoji = "🟢" if efficiency >= 80 else "🟡" if efficiency >= 60 else "🔴"
+            cpu_emoji = "✅" if cpu >= 90 else "⚠️" if cpu >= 70 else "❌"
+            
+            lines.append(f"| {cores} {eff_emoji} | {throughput:>15,.0f} | {speedup:>5.2f}x | {efficiency:>8.1f}% | {cpu:>4.1f}% {cpu_emoji} | {dropped:>6.2f}% |\n")
+    
+    lines.append("\n### OTAP Protocol\n")
+    lines.append("| Cores | Throughput (logs/s) | Speedup | Efficiency | CPU % | Dropped % |\n")
+    lines.append("|-------|--------------------:|--------:|-----------:|------:|----------:|\n")
+    
+    for cores in ['1', '2', '4', '8']:
+        key = f"{cores}core-OTAP-ATTR-OTLP"
+        if key in metrics and 'throughput' in metrics[key]:
+            m = metrics[key]
+            speedup = m.get('speedup', 0)
+            efficiency = m.get('efficiency', 0)
+            cpu = m.get('cpu', 0)
+            throughput = m.get('throughput', 0)
+            dropped = m.get('dropped', 0)
+            
+            # Add status emoji
+            eff_emoji = "🟢" if efficiency >= 80 else "🟡" if efficiency >= 60 else "🔴"
+            cpu_emoji = "✅" if cpu >= 90 else "⚠️" if cpu >= 70 else "❌"
+            
+            lines.append(f"| {cores} {eff_emoji} | {throughput:>15,.0f} | {speedup:>5.2f}x | {efficiency:>8.1f}% | {cpu:>4.1f}% {cpu_emoji} | {dropped:>6.2f}% |\n")
+    
+    lines.append("\n**Legend:**\n")
+    lines.append("- 🟢 Efficiency ≥80% | 🟡 60-80% | 🔴 <60%\n")
+    lines.append("- ✅ CPU ≥90% (saturated) | ⚠️ 70-90% | ❌ <70% (under-utilized)\n")
+    lines.append("- **Ideal Linear Scaling:** Efficiency = 100%, Speedup = # of cores\n\n")
+    
+    markdown_output = ''.join(lines)
+    
+    # Determine output destination
+    # Priority: 1) explicit output_file, 2) GITHUB_STEP_SUMMARY env var, 3) stdout
+    if output_file:
+        with open(output_file, 'a') as f:
+            f.write(markdown_output)
+        print(f"Summary appended to: {output_file}")
+    elif 'GITHUB_STEP_SUMMARY' in os.environ:
+        summary_file = os.environ['GITHUB_STEP_SUMMARY']
+        with open(summary_file, 'a') as f:
+            f.write(markdown_output)
+        print(f"Summary appended to GitHub Step Summary: {summary_file}")
+    else:
+        print(markdown_output)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print(__doc__)
+        sys.exit(1)
+    
+    input_file = sys.argv[1]
+    output_file = sys.argv[2] if len(sys.argv) > 2 else None
+    
+    generate_summary(input_file, output_file)