kiritigowda
diff --git a/‎README.md‎
Lines changed: 3 additions & 0 deletions b/‎README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎include/benchmark_report.h‎
Lines changed: 13 additions & 0 deletions b/‎include/benchmark_report.h‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎scripts/compare_reports.py‎
Lines changed: 56 additions & 0 deletions b/‎scripts/compare_reports.py‎
Lines changed: 56 additions & 0 deletions
@@ -252,6 +252,7 @@ Interpreting `parallelism_efficiency`:
 =============================================================
   Summary: 156 total | 156 passed | 0 skipped | 0 failed
   OpenVX Vision Score: 1586.05 MP/s (156 benchmarks)
+  OpenVX Framework Score: 4.872x (geomean of 18 framework metrics)
   vision Conformance: PASS (41/41)
   vision Top-5 Fastest:
     1. Not                           26835.8 MP/s (graph, FHD)
@@ -281,6 +282,7 @@ Interpreting `parallelism_efficiency`:
 - **OpenVX Vision Score** — Geometric mean of MP/s across all passing graph-mode vision benchmarks
 - **Enhanced Vision Score** — Geometric mean when enhanced_vision benchmarks are included
 - **Category Sub-Scores** — Per-category geometric mean (pixelwise, filters, color, etc.)
+- **OpenVX Framework Score** — Equal-weight geometric mean (dimensionless, ×) of all `graph_speedup`, `virtual_dividend`, `parallelism_efficiency`, and `concurrency_speedup` values produced by the framework benchmarks. **>1.0 means the OpenVX graph framework adds aggregate value over a kernel-only baseline.** Lower-is-better metrics (e.g. `verify_per_node_ms`, `async_overhead_ratio`) are intentionally excluded so the score has a single monotonic interpretation. Only emitted when framework benchmarks are run (`--feature-set framework` or `--feature-set everything`).
 
 ### Conformance Summary
 
@@ -334,6 +336,7 @@ python3 scripts/compare_reports.py results_vendor_a/benchmark_results.json \
 | **Sustained Ratio** | `min_ns / median_ns`. Values near 1.0 indicate consistent performance; lower values suggest variance from caching, scheduling, or thermal effects. |
 | **Scaling Efficiency** | `(MP/s at high res) / (MP/s at low res)`. 1.0 = perfect scaling; values below 1.0 indicate memory or bandwidth bottlenecks at higher resolutions. |
 | **Vision Score** | Geometric mean of MP/s across all passing graph-mode vision benchmarks. Single-number summary for cross-vendor comparison. |
+| **Framework Score** | Equal-weight geometric mean (×, dimensionless) of all `graph_speedup`, `virtual_dividend`, `parallelism_efficiency`, and `concurrency_speedup` values produced by framework benchmarks. >1.0 means the OpenVX graph framework adds aggregate value over a kernel-only baseline. Only emitted when framework benchmarks are run. |
 | **Stability Warning** | Flagged when CV% exceeds the stability threshold (default: 15%). Indicates the result may not be reliable — increase iterations or reduce system load. |
 | **Conformance** | Whether all available kernels in a feature set produced valid graph-mode results. PASS = all kernels benchmarked successfully. |
 
 
@@ -16,6 +16,19 @@ struct CompositeScores {
     std::map<std::string, double> category_scores;  // per-category geometric mean
     int vision_count = 0;
     int enhanced_count = 0;
+
+    // Framework Score: dimensionless geometric mean of "higher is better" framework
+    // metrics that capture how much value the OpenVX *graph framework* adds beyond
+    // raw kernel throughput. Aggregates:
+    //   - graph_speedup           (graph_dividend benchmarks)
+    //   - virtual_dividend        (graph_dividend benchmarks)
+    //   - parallelism_efficiency  (parallel_branches benchmarks)
+    //   - concurrency_speedup     (async_streaming benchmarks)
+    // A value >1.0 means the framework adds aggregate value over a kernel-only
+    // baseline. Lower-is-better metrics (verify cost, async overhead) are
+    // intentionally excluded so the score has a single, monotonic interpretation.
+    double framework_score = 0;
+    int framework_metric_count = 0;
 };
 
 // Conformance checking (Feature 7)
 
@@ -147,6 +147,13 @@ def write_markdown(impl_names, result_maps, all_keys, output_path, reports, syst
         if enhanced_a > 0 or enhanced_b > 0:
             f.write(f'| Enhanced Vision Score (MP/s) | {enhanced_a:.2f} | {enhanced_b:.2f} |\n')
 
+        framework_a = scores[0].get('framework_score', 0) if len(scores) > 0 else 0
+        framework_b = scores[1].get('framework_score', 0) if len(scores) > 1 else 0
+        framework_count_a = scores[0].get('framework_metric_count', 0) if len(scores) > 0 else 0
+        framework_count_b = scores[1].get('framework_metric_count', 0) if len(scores) > 1 else 0
+        if framework_count_a > 0 or framework_count_b > 0:
+            f.write(f'| Framework Score (x, geomean) | {framework_a:.3f} | {framework_b:.3f} |\n')
+
         conformance_info = []
         for report in reports:
             conf_list = report.get('conformance', [])
@@ -189,6 +196,55 @@ def write_markdown(impl_names, result_maps, all_keys, output_path, reports, syst
                 f.write(f'| {display} | {a_val:.2f} | {b_val:.2f} | {sign}{change:.1f} |\n')
             f.write('\n')
 
+        # --- Framework Metrics Comparison ---
+        # Group framework metrics by (benchmark name, resolution); union across reports.
+        fw_keys = {}  # key -> display
+        fw_metrics_by_key = {}  # key -> set(metric names)
+        per_side_metrics = [{}, {}]  # side -> key -> {metric_name: dict}
+        for side, rmap in enumerate(result_maps):
+            for (name, mode, resolution), r in rmap.items():
+                fms = r.get('framework_metrics', [])
+                if not fms:
+                    continue
+                key = (name, resolution)
+                fw_keys[key] = f'{name} @ {resolution}'
+                fw_metrics_by_key.setdefault(key, set())
+                per_side_metrics[side].setdefault(key, {})
+                for fm in fms:
+                    nm = fm.get('name')
+                    if not nm:
+                        continue
+                    fw_metrics_by_key[key].add(nm)
+                    per_side_metrics[side][key][nm] = fm
+
+        if fw_keys:
+            f.write('## Framework Metrics Comparison\n\n')
+            f.write(f'> Per-scenario framework metrics (orchestration, scheduling, async, '
+                    f'verification). Higher-is-better metrics show {impl_names[1]}/{impl_names[0]}; '
+                    f'lower-is-better metrics show {impl_names[0]}/{impl_names[1]}. '
+                    f'A ratio >1.00 always means {impl_names[1]} is better.\n\n')
+            f.write(f'| Benchmark @ Resolution | Metric | Unit | {impl_names[0]} | {impl_names[1]} | Ratio | Direction |\n')
+            f.write('|:---|:---|:---|---:|---:|---:|:---|\n')
+            for key in sorted(fw_keys.keys()):
+                display = fw_keys[key]
+                for nm in sorted(fw_metrics_by_key[key]):
+                    a_fm = per_side_metrics[0].get(key, {}).get(nm)
+                    b_fm = per_side_metrics[1].get(key, {}).get(nm)
+                    higher_better = (a_fm or b_fm or {}).get('higher_is_better', True)
+                    unit = (a_fm or b_fm or {}).get('unit', '') or '—'
+                    a_val = a_fm.get('value') if a_fm else None
+                    b_val = b_fm.get('value') if b_fm else None
+                    a_str = f'{a_val:.3f}' if a_val is not None else '—'
+                    b_str = f'{b_val:.3f}' if b_val is not None else '—'
+                    if a_val and b_val and a_val > 0 and b_val > 0:
+                        ratio = (b_val / a_val) if higher_better else (a_val / b_val)
+                        ratio_str = f'{ratio:.2f}'
+                    else:
+                        ratio_str = '—'
+                    direction = 'higher is better' if higher_better else 'lower is better'
+                    f.write(f'| {display} | `{nm}` | {unit} | {a_str} | {b_str} | {ratio_str} | {direction} |\n')
+            f.write('\n')
+
         # --- Build comparison rows (include all results, not just verified) ---
         comparison_rows = []
         for key in all_keys: