kiritigowda
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/features-to-add.md‎
Lines changed: 84 additions & 0 deletions b/‎docs/features-to-add.md‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎include/benchmark_runner.h‎
Lines changed: 4 additions & 0 deletions b/‎include/benchmark_runner.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎include/openvx_version.h‎
Lines changed: 39 additions & 0 deletions b/‎include/openvx_version.h‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎include/verify_utils.h‎
Lines changed: 27 additions & 0 deletions b/‎include/verify_utils.h‎
Lines changed: 27 additions & 0 deletions
@@ -7,9 +7,9 @@ on:
     branches: [main]
 
 jobs:
-  benchmark:
+  benchmark-khronos-mivisionx:
     runs-on: ubuntu-latest
-    name: Build, Benchmark & Compare
+    name: Khronos & MIVisionX - Build, Benchmark & Compare
 
     steps:
       - name: Checkout openvx-mark
@@ -49,7 +49,7 @@ jobs:
       # --- MIVisionX (AMD OpenVX) ---
       - name: Build MIVisionX (CPU backend)
         run: |
-          git clone --depth 1 https://github.com/ROCm/MIVisionX.git /tmp/openvx-mivisionx
+          git clone --depth 1 --branch develop https://github.com/ROCm/MIVisionX.git /tmp/openvx-mivisionx
           cd /tmp/openvx-mivisionx && mkdir build && cd build
           cmake -DBACKEND=CPU -DNEURAL_NET=OFF -DLOOM=OFF -DMIGRAPHX=OFF \
                 -DCMAKE_INSTALL_PREFIX=/tmp/openvx-mivisionx/install ..
@@ -88,7 +88,7 @@ jobs:
             exit 0
           fi
 
-          python3 scripts/compare_reports.py "$MIVISIONX" "$KHRONOS" \
+          python3 scripts/compare_reports.py "$KHRONOS" "$MIVISIONX" \
             --output comparison
 
       - name: Post comparison to job summary
@@ -118,6 +118,6 @@ jobs:
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: benchmark-comparison
+          name: benchmark-comparison-khronos-mivisionx
           path: comparison.*
           if-no-files-found: ignore
@@ -98,6 +98,7 @@ set(BENCHMARK_SOURCES
     src/kernel_registry.cpp
     src/test_data_generator.cpp
     src/system_info.cpp
+    src/verify_utils.cpp
     src/benchmarks/node_pixelwise.cpp
     src/benchmarks/node_filters.cpp
     src/benchmarks/node_color.cpp
 
@@ -19,6 +19,10 @@ openvx-mark works with any conformant OpenVX implementation — AMD OpenVX (MIVi
 - **Baseline comparison** — compare JSON reports across runs or vendors
 - **Reports** — JSON, CSV, and Markdown output with glossary
 
+## Important
+
+It is recommended that the OpenVX implementation first passes the [Khronos OpenVX Conformance Test Suite](https://github.com/KhronosGroup/OpenVX-cts) before running openvx-mark. Benchmarking results are only meaningful when the underlying implementation is conformant — non-conformant implementations may produce incorrect outputs, which will be flagged by openvx-mark's output verification and excluded from composite scores.
+
 ## Prerequisites
 
 - C++17 compiler
 
@@ -0,0 +1,84 @@
+# Features To Add
+
+## Verification Audit Recommendations
+
+Recommendations from a verification audit of all 52 benchmark verify functions.
+
+## Priority 1: Strengthen Filter Tests with Non-Uniform Input
+
+All 8 filter verify functions use constant-value input (all pixels = 100), making them unable to distinguish a working filter from a simple copy or no-op.
+
+**Affected benchmarks:** Box3x3, Gaussian3x3, Median3x3, Erode3x3, Dilate3x3, Sobel3x3, CustomConvolution, NonLinearFilter
+
+**Recommended fixes:**
+- **Box3x3** — Input with a single bright pixel (255) surrounded by zeros. Output center should be ~28 (255/9).
+- **Gaussian3x3** — Input with a single bright pixel. Output center should be less than 255 due to Gaussian weighting.
+- **Median3x3** — Input with salt-and-pepper noise. Output should be smoother than input.
+- **Erode3x3** — Input with an isolated bright pixel in a dark field. Erode should remove it (output = 0 at that position).
+- **Dilate3x3** — Input with an isolated dark pixel in a bright field. Dilate should fill it (output = 255 at that position).
+- **Sobel3x3** — Input with a horizontal edge (top half = 0, bottom half = 255). Verify dy gradient is non-zero at the edge.
+- **CustomConvolution** — Use a non-identity kernel (e.g., edge-detect) and verify output differs from input.
+- **NonLinearFilter** — Use a pattern where min/median/max produce distinct, verifiable results.
+
+## Priority 2: Use Non-Identity Geometric Transforms
+
+WarpAffine, WarpPerspective, and Remap all use identity transforms, so output trivially equals input. A copy operation would pass.
+
+**Affected benchmarks:** WarpAffine, WarpPerspective, Remap
+
+**Recommended fixes:**
+- **WarpAffine** — Use a known translation (e.g., shift by 10 pixels) and verify the pixel value appears at the expected offset.
+- **WarpPerspective** — Use a known simple perspective transform and verify pixel displacement.
+- **Remap** — Use a coordinate mapping that flips or shifts the image and verify output positions.
+
+## Priority 3: Verify Feature Detector Output
+
+HarrisCorners, FastCorners, and OpticalFlowPyrLK only check that graph execution succeeds without verifying detected features.
+
+**Affected benchmarks:** HarrisCorners, FastCorners, OpticalFlowPyrLK
+
+**Recommended fixes:**
+- **HarrisCorners** — Use a checkerboard or cross pattern with obvious corners. Verify the output array is non-empty.
+- **FastCorners** — Same approach. Verify at least one corner is detected on a known pattern.
+- **OpticalFlowPyrLK** — Verify that tracked keypoint positions shift in the expected direction between frames.
+
+## Priority 4: Multi-Pixel Sampling for Single-Pixel Checks
+
+Several tests only check a single output pixel. A bug affecting other regions would go undetected.
+
+**Affected benchmarks:** ChannelExtract, ChannelCombine, Phase, ScaleImage_Half, ScaleImage_Double
+
+**Recommended fixes:**
+- Sample at least 3-4 positions (e.g., center, corners, mid-edges) to verify the operation is consistent across the image.
+
+## Priority 5: Strengthen Remaining Weak Checks
+
+- **LBP** — Currently only checks `imageNonZero`. Should verify specific LBP pattern values for a known input.
+- **EqualizeHist** — Currently checks all pixels are equal +/-1. Could additionally verify the output value matches the expected equalized level for uniform input (should map to ~128 for full-range equalization).
+
+## Comparison Report Enhancements
+
+Features implemented in the polished comparison report (both C++ `--compare` and Python `compare_reports.py`):
+
+### Implemented
+
+- **System info section** — Shows CPU, cores, RAM, OS. Detects same vs different hardware with a mismatch warning.
+- **Conformance & Scores table** — Side-by-side Vision Score (geometric mean MP/s), conformance PASS/FAIL with kernel counts.
+- **Category sub-scores** — Per-category geometric mean comparison with % change column.
+- **Summary with per-category breakdown** — Regression/improvement/unchanged counts, broken down by category (e.g., "3 regressions in filters").
+- **Detailed results with MP/s** — Both median latency (ms) and throughput (MP/s) for each implementation, plus change % and status.
+- **Benchmarks only in one report** — Lists benchmarks present in one file but not the other, so nothing is silently dropped.
+- **Stability caveat flags** — Marks rows where either side had CV% > 15%, with a footnote explaining unreliable comparisons.
+- **CSV output from C++** — Generates both `.md` and `.csv` from the C++ `--compare` path.
+- **Vision Score from JSON** — Python script reads precomputed `overall_vision_score` from JSON instead of incorrectly summing MP/s.
+- **Missing kernels detail** — Shows missing kernel lists side by side when conformance differs.
+
+### Future Enhancements
+
+- **Configurable regression threshold** — The 5% threshold for regression/improvement is hardcoded. Add a `--threshold` CLI option to both C++ and Python.
+- **Statistical significance testing** — When iterations > 1, perform confidence interval or t-test analysis to determine if differences are statistically meaningful.
+- **Multi-resolution scaling comparison** — Compare scaling efficiency between implementations (how well each handles higher resolutions).
+- **Chart/graph output** — Generate bar charts or SVG visualizations for throughput comparison.
+- **N-way comparison** — Support comparing 3+ implementations in a single report (currently optimized for pairwise).
+- **Grouped-by-category view** — Option to group the detailed results table by category instead of sorting by change %.
+- **Historical trend tracking** — Compare against a series of reports over time to detect gradual regressions.
@@ -33,6 +33,10 @@ struct BenchmarkCase {
         vx_context ctx, uint32_t width, uint32_t height,
         TestDataGenerator& gen, ResourceTracker& tracker)>;
     ImmediateFn immediate_func;
+
+    // Output verification: runs kernel on small known input, checks correctness
+    using VerifyFn = std::function<bool(vx_context ctx)>;
+    VerifyFn verify_fn;
 };
 
 class BenchmarkRunner {
 
@@ -49,4 +49,43 @@
 #define OPENVX_HAS_1_3 0
 #endif
 
+// Compatibility wrappers: OpenVX 1.3 APIs that map to 1.1 equivalents
+#if !OPENVX_HAS_1_3
+
+static inline vx_threshold vxCreateThresholdForImage(vx_context ctx,
+    vx_enum thresh_type, vx_df_image /*in_fmt*/, vx_df_image /*out_fmt*/) {
+    return vxCreateThreshold(ctx, thresh_type, VX_TYPE_UINT8);
+}
+
+static inline vx_status vxCopyThresholdValue(vx_threshold thresh,
+    vx_pixel_value_t *value, vx_enum usage, vx_enum /*mem_type*/) {
+    if (usage == VX_WRITE_ONLY) {
+        vx_int32 v = value->U8;
+        return vxSetThresholdAttribute(thresh,
+            VX_THRESHOLD_THRESHOLD_VALUE, &v, sizeof(v));
+    }
+    return VX_ERROR_NOT_SUPPORTED;
+}
+
+static inline vx_status vxCopyThresholdRange(vx_threshold thresh,
+    vx_pixel_value_t *lower, vx_pixel_value_t *upper,
+    vx_enum usage, vx_enum /*mem_type*/) {
+    if (usage == VX_WRITE_ONLY) {
+        vx_int32 lo = lower->U8, hi = upper->U8;
+        vx_status s = vxSetThresholdAttribute(thresh,
+            VX_THRESHOLD_THRESHOLD_LOWER, &lo, sizeof(lo));
+        if (s != VX_SUCCESS) return s;
+        return vxSetThresholdAttribute(thresh,
+            VX_THRESHOLD_THRESHOLD_UPPER, &hi, sizeof(hi));
+    }
+    return VX_ERROR_NOT_SUPPORTED;
+}
+
+// OpenVX 1.1 uses vxSetRemapPoint per pixel instead of vxCopyRemapPatch
+#define OPENVX_USE_SET_REMAP_POINT 1
+
+#else
+#define OPENVX_USE_SET_REMAP_POINT 0
+#endif
+
 #endif // OPENVX_VERSION_H
@@ -0,0 +1,27 @@
+#ifndef VERIFY_UTILS_H
+#define VERIFY_UTILS_H
+
+#include <VX/vx.h>
+#include <cstdint>
+#include <vector>
+
+namespace verify {
+
+vx_image createImage(vx_context ctx, uint32_t w, uint32_t h,
+                     vx_df_image format, const uint8_t* data);
+
+std::vector<uint8_t> readImage(vx_image img, uint32_t w, uint32_t h);
+
+std::vector<int16_t> readImageS16(vx_image img, uint32_t w, uint32_t h);
+
+bool compareU8(const std::vector<uint8_t>& actual,
+               const std::vector<uint8_t>& expected, int tolerance = 0);
+
+bool compareS16(const std::vector<int16_t>& actual,
+                const std::vector<int16_t>& expected, int tolerance = 0);
+
+bool imageNonZero(vx_image img, uint32_t w, uint32_t h);
+
+} // namespace verify
+
+#endif // VERIFY_UTILS_H