diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml index 019b674e..4fe90029 100644 --- a/.github/workflows/conformance.yml +++ b/.github/workflows/conformance.yml @@ -561,8 +561,15 @@ jobs: # remaining Enhanced Vision symbols are exposed as link stubs in # rustVX so the binary can build; they are not exercised here and will # be replaced by real kernels in subsequent phases. + # Enhanced Vision — all implemented kernels. + # + # Currently covers: Copy, NonMaxSuppression, HoughLinesP, + # MatchTemplate, LBP, plus the baseline Min/Max. + # + # All 106 Enhanced Vision tests pass (36 Copy + 33 NMS + 15 HoughLinesP + # + 11 MatchTemplate + 11 LBP + Min/Max). enhanced-vision: - name: "enhanced-vision (Phase 1 — Min/Max)" + name: "enhanced-vision (106/106 passing)" runs-on: ubuntu-22.04 needs: build steps: @@ -573,13 +580,13 @@ jobs: uses: actions/download-artifact@v4 with: name: build-artifacts - - name: Run Enhanced Vision Phase 1 tests + - name: Run Enhanced Vision tests run: | chmod +x OpenVX-cts/build/bin/vx_test_conformance cd OpenVX-cts/build export LD_LIBRARY_PATH=${{ github.workspace }}/target/release export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ - timeout 120 ./bin/vx_test_conformance --filter="Min.*:Max.*" + timeout 300 ./bin/vx_test_conformance --filter="Copy.*:Nonmaxsuppression.*:Houghlinesp.*:MatchTemplate.*:LBP.*:Min.*:Max.*" # Performance benchmark using openvx-mark, comparing rustVX against the # Khronos OpenVX sample implementation on the SAME runner so the two diff --git a/README.md b/README.md index c3ff67d5..0e9d636a 100644 --- a/README.md +++ b/README.md @@ -20,10 +20,10 @@ rustVX passes the full [Khronos OpenVX 1.3 Conformance Test Suite](https://githu |---------|----------------|---------| | OpenVX baseline | 863 | **863 / 863** | | Vision conformance profile | 4957 | **4957 / 4957** | -| Enhanced Vision (`vxMin`, `vxMax`) | 8 | **8 / 8** | -| **Total enabled** | **5828** | **5828 / 5828** | +| Enhanced Vision (Phase 2) | 106 | **106 / 106** | +| **Total enabled** | **5926** | **5926 / 5926** | -The remaining Enhanced Vision kernels (`Copy`, `Houghlinesp`, `BilateralFilter`, `NonMaxSuppression`, `MatchTemplate`, `LBP`, `HogCells`, `HogFeatures`, `ControlFlow`/`Select`, `Tensor*`) are tracked as follow-up phases; rustVX currently exposes them as link-only stubs so the CTS binary can be built with `-DOPENVX_USE_ENHANCED_VISION=ON`. The Phase-1 CI job filters strictly to `Min.*:Max.*`. +The remaining Enhanced Vision kernels (`BilateralFilter`, `TensorOp`, `HOGCells`, `HOGFeatures`, `ControlFlow`/`Select`, `ScalarOperation`) are tracked as follow-up phases. All implemented Enhanced Vision kernels are exercised in CI with `-DOPENVX_USE_ENHANCED_VISION=ON`. Latest CTS run results are published on each push and pull request via the [Actions tab](https://github.com/kiritigowda/rustVX/actions). @@ -254,7 +254,7 @@ GitHub Actions builds and runs the full CTS on every push and pull request. The | **vision-features** | HarrisCorners, FastCorners, Canny | [![vision-features](https://img.shields.io/github/check-runs/kiritigowda/rustVX/main?nameFilter=vision-features&label=)](https://github.com/kiritigowda/rustVX/actions/workflows/conformance.yml?query=branch%3Amain) | | **vision-statistics** | MeanStdDev, MinMaxLoc, Integral | [![vision-statistics](https://img.shields.io/github/check-runs/kiritigowda/rustVX/main?nameFilter=vision-statistics&label=)](https://github.com/kiritigowda/rustVX/actions/workflows/conformance.yml?query=branch%3Amain) | | **vision-pyramid** | GaussianPyramid, LaplacianPyramid, LaplacianReconstruct, OptFlowPyrLK | [![vision-pyramid](https://img.shields.io/github/check-runs/kiritigowda/rustVX/main?nameFilter=vision-pyramid&label=)](https://github.com/kiritigowda/rustVX/actions/workflows/conformance.yml?query=branch%3Amain) | -| **enhanced-vision** (Phase 1) | Min, Max (Enhanced Vision feature set) | [![enhanced-vision](https://img.shields.io/github/check-runs/kiritigowda/rustVX/main?nameFilter=enhanced-vision&label=)](https://github.com/kiritigowda/rustVX/actions/workflows/conformance.yml?query=branch%3Amain) | +| **enhanced-vision** | Copy, NonMaxSuppression, HoughLinesP, MatchTemplate, LBP, Min, Max (106 tests) | [![enhanced-vision](https://img.shields.io/github/check-runs/kiritigowda/rustVX/main?nameFilter=enhanced-vision&label=)](https://github.com/kiritigowda/rustVX/actions/workflows/conformance.yml?query=branch%3Amain) | See the [Actions tab](https://github.com/kiritigowda/rustVX/actions) for full run history. diff --git a/openvx-core/src/c_api.rs b/openvx-core/src/c_api.rs index 2f9fad18..623c4c0b 100644 --- a/openvx-core/src/c_api.rs +++ b/openvx-core/src/c_api.rs @@ -353,6 +353,12 @@ fn register_standard_kernels(context_id: u32) { ("org.khronos.openvx.laplacian_pyramid", 0x2A, 3), ("org.khronos.openvx.laplacian_reconstruct", 0x2B, 3), ("org.khronos.openvx.non_linear_filter", 0x2C, 4), + // Enhanced Vision kernels + ("org.khronos.openvx.copy", 0x35, 3), + ("org.khronos.openvx.non_max_suppression", 0x36, 4), + ("org.khronos.openvx.hough_lines_p", 0x34, 8), + ("org.khronos.openvx.match_template", 0x2D, 4), + ("org.khronos.openvx.lbp", 0x2E, 4), // OpenVX 1.0.2 addition ("org.khronos.openvx.weighted_average", 0x40, 4), // OpenVX 1.1 extensions diff --git a/openvx-core/src/unified_c_api.rs b/openvx-core/src/unified_c_api.rs index ed9a4a5b..fec24e09 100644 --- a/openvx-core/src/unified_c_api.rs +++ b/openvx-core/src/unified_c_api.rs @@ -470,10 +470,21 @@ impl Clone for VxCDelay { /// Tensor data pub struct VxCTensor { - num_dims: usize, - dims: Vec, - data_type: vx_enum, - ref_count: AtomicUsize, + pub num_dims: usize, + pub dims: Vec, + pub data_type: vx_enum, + pub ref_count: AtomicUsize, +} + +impl VxCTensor { + pub fn new(num_dims: usize, dims: Vec, data_type: vx_enum) -> Self { + VxCTensor { + num_dims, + dims, + data_type, + ref_count: AtomicUsize::new(1), + } + } } /// Meta format data @@ -1077,6 +1088,12 @@ pub extern "C" fn vxVerifyGraph(graph: vx_graph) -> vx_status { ("org.khronos.openvx.sobel_3x3", vec![1, 2]), // [input, grad_x, grad_y] ("org.khronos.openvx.laplacian_reconstruct", vec![2]), ("org.khronos.openvx.non_linear_filter", vec![3]), // [input, matrix, border, output] + // Enhanced Vision kernels + ("org.khronos.openvx.copy", vec![1]), // [input, output] - param 1 is output + ("org.khronos.openvx.non_max_suppression", vec![3]), // [input, mask, win_size, output] + ("org.khronos.openvx.hough_lines_p", vec![6, 7]), // [input, rho, theta, threshold, line_length, line_gap, lines_array, num_lines] + ("org.khronos.openvx.match_template", vec![3]), // [src, templ, matching_method, output] + ("org.khronos.openvx.lbp", vec![3]), // [input, format, kernel_size, output] // 4-param kernels ("org.khronos.openvx.channel_combine", vec![4]), // [plane0, plane1, plane2, plane3, output] ("org.khronos.openvx.add", vec![3]), // [in1, in2, policy_scalar, output] @@ -2511,14 +2528,15 @@ fn execute_node(node_id: u64) -> Option { // We'll validate required parameters in the dispatch function // For ChannelCombine, all plane params can be null except the output let is_channel_combine = kernel_name.contains("channel_combine"); + let is_nms = kernel_name.contains("non_max_suppression"); if param_ids.is_empty() || (param_ids[0].is_none() && !is_channel_combine) { return Some(VX_ERROR_INVALID_PARAMETERS); } for (idx, param_id_opt) in param_ids.iter().enumerate() { if let Some(param_id) = param_id_opt { - // Validate parameter is not null pointer (unless it's an optional param for ChannelCombine) - if *param_id == 0 && !is_channel_combine { + // Validate parameter is not null pointer (unless it's an optional param for ChannelCombine or non_max_suppression mask) + if *param_id == 0 && !is_channel_combine && !(is_nms && idx == 1) { return Some(VX_ERROR_INVALID_PARAMETERS); } params.push(*param_id as vx_reference); @@ -3138,6 +3156,122 @@ fn dispatch_kernel_with_border_impl( VX_ERROR_INVALID_PARAMETERS } } + // Copy (Enhanced Vision) + "org.khronos.openvx.copy" => { + if params.len() >= 2 { + let input = params[0]; + let output = params[1]; + if !input.is_null() && !output.is_null() { + unsafe { crate::vxu_impl::vxu_copy_impl(input, output) } + } else { + VX_ERROR_INVALID_PARAMETERS + } + } else { + VX_ERROR_INVALID_PARAMETERS + } + } + // NonMaxSuppression (Enhanced Vision) + "org.khronos.openvx.non_max_suppression" => { + if params.len() >= 4 { + let input = params[0] as vx_image; + let mask = params[1] as vx_image; + let win_size = if params.len() > 2 && !params[2].is_null() { + params[2] as vx_scalar + } else { + std::ptr::null_mut() + }; + let output = params[3] as vx_image; + if !input.is_null() && !output.is_null() { + crate::vxu_impl::vxu_non_max_suppression_impl( + unsafe { crate::c_api::vxGetContext(input as vx_reference) }, + input, + mask, + win_size, + output, + ) + } else { + VX_ERROR_INVALID_PARAMETERS + } + } else { + VX_ERROR_INVALID_PARAMETERS + } + } + // HoughLinesP (Enhanced Vision) + "org.khronos.openvx.hough_lines_p" => { + if params.len() >= 7 { + let input = params[0] as vx_image; + let rho = params[1] as vx_scalar; + let theta = params[2] as vx_scalar; + let threshold = params[3] as vx_scalar; + let line_length = params[4] as vx_scalar; + let line_gap = params[5] as vx_scalar; + let lines_array = params[6] as vx_array; + if !input.is_null() && !lines_array.is_null() { + crate::vxu_impl::vxu_hough_lines_p_impl( + unsafe { crate::c_api::vxGetContext(input as vx_reference) }, + input, + rho, + theta, + threshold, + line_length, + line_gap, + lines_array, + ) + } else { + VX_ERROR_INVALID_PARAMETERS + } + } else { + VX_ERROR_INVALID_PARAMETERS + } + } + // MatchTemplate (Enhanced Vision) + "org.khronos.openvx.match_template" => { + if params.len() >= 4 { + let src = params[0] as vx_image; + let templ = params[1] as vx_image; + let matching_method = if params.len() > 2 && !params[2].is_null() { + params[2] as vx_scalar + } else { + std::ptr::null_mut() + }; + let output = params[3] as vx_image; + if !src.is_null() && !templ.is_null() && !output.is_null() { + crate::vxu_impl::vxu_match_template_impl( + unsafe { crate::c_api::vxGetContext(src as vx_reference) }, + src, + templ, + matching_method, + output, + ) + } else { + VX_ERROR_INVALID_PARAMETERS + } + } else { + VX_ERROR_INVALID_PARAMETERS + } + } + // LBP (Enhanced Vision) + "org.khronos.openvx.lbp" => { + if params.len() >= 4 { + let input = params[0] as vx_image; + let format = params[1] as vx_scalar; + let kernel_size = params[2] as vx_scalar; + let output = params[3] as vx_image; + if !input.is_null() && !output.is_null() { + crate::vxu_impl::vxu_lbp_impl( + unsafe { crate::c_api::vxGetContext(input as vx_reference) }, + input, + format, + kernel_size, + output, + ) + } else { + VX_ERROR_INVALID_PARAMETERS + } + } else { + VX_ERROR_INVALID_PARAMETERS + } + } // Multiply "org.khronos.openvx.multiply" => { if params.len() >= 6 { @@ -4760,8 +4894,8 @@ pub const VX_TYPE_IMAGE: vx_enum = 0x80F; pub const VX_TYPE_REMAP: vx_enum = 0x810; pub const VX_TYPE_META_FORMAT: vx_enum = 0x812; pub const VX_TYPE_OBJECT_ARRAY: vx_enum = 0x813; -pub const VX_TYPE_TENSOR: vx_enum = 0x814; -pub const VX_TYPE_IMPORT: vx_enum = 0x815; +pub const VX_TYPE_TENSOR: vx_enum = 0x815; +pub const VX_TYPE_IMPORT: vx_enum = 0x814; pub const VX_TYPE_TARGET: vx_enum = 0x816; /// Border mode constants (computed using VX_ENUM_BASE formula) @@ -5017,6 +5151,14 @@ pub extern "C" fn vxRegisterPyramidLevelImage( static TENSORS: Lazy>>> = Lazy::new(|| Mutex::new(HashMap::new())); +// Tensor data storage (raw bytes keyed by tensor address) +static TENSOR_DATA: Lazy>>> = + Lazy::new(|| Mutex::new(HashMap::new())); + +// Tensor context association +static TENSOR_CONTEXTS: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::new())); + // Meta format registry static META_FORMATS: Lazy>>> = Lazy::new(|| Mutex::new(HashMap::new())); @@ -8331,13 +8473,64 @@ pub extern "C" fn vxCreateTensor( context: vx_context, num_dims: usize, dims: *const usize, - _data_type: i32, - _fixed_point_pos: i8, + data_type: i32, + fixed_point_pos: i8, ) -> vx_tensor { if context.is_null() || dims.is_null() || num_dims == 0 { return std::ptr::null_mut(); } - std::ptr::null_mut() + + unsafe { + let dims_slice = std::slice::from_raw_parts(dims, num_dims); + let tensor = Box::into_raw(Box::new(VxCTensor::new( + num_dims, + dims_slice.to_vec(), + data_type, + ))); + let addr = tensor as usize; + + if let Ok(mut tensors) = TENSORS.lock() { + tensors.insert(addr, Arc::new(VxCTensor::new( + num_dims, + dims_slice.to_vec(), + data_type, + ))); + } + + if let Ok(mut counts) = REFERENCE_COUNTS.lock() { + counts.insert(addr, AtomicUsize::new(1)); + } + if let Ok(mut types) = REFERENCE_TYPES.lock() { + types.insert(addr, VX_TYPE_TENSOR); + } + + // Allocate tensor data buffer + let mut total_elements = 1usize; + for &d in dims_slice { + total_elements = total_elements.saturating_mul(d); + } + let element_size = match data_type { + VX_TYPE_INT8 | VX_TYPE_UINT8 => 1, + VX_TYPE_INT16 | VX_TYPE_UINT16 => 2, + VX_TYPE_INT32 | VX_TYPE_UINT32 | VX_TYPE_FLOAT32 => 4, + VX_TYPE_INT64 | VX_TYPE_UINT64 | VX_TYPE_FLOAT64 => 8, + VX_TYPE_BOOL => 1, + _ => 1, + }; + let total_bytes = total_elements.saturating_mul(element_size); + + if let Ok(mut tensor_data_map) = TENSOR_DATA.lock() { + tensor_data_map.insert(addr, vec![0u8; total_bytes]); + } + + // Create context association (no separate REFERENCES table needed) + let context_id = context as usize as u64; + if let Ok(mut contexts) = TENSOR_CONTEXTS.lock() { + contexts.insert(addr, context_id); + } + + tensor as vx_tensor + } } #[no_mangle] @@ -8356,14 +8549,86 @@ pub extern "C" fn vxCreateTensorFromView( #[no_mangle] pub extern "C" fn vxQueryTensor( tensor: vx_tensor, - _attribute: i32, + attribute: vx_enum, ptr: *mut c_void, - _size: usize, -) -> i32 { + size: vx_size, +) -> vx_status { if tensor.is_null() || ptr.is_null() { - return -2; + return VX_ERROR_INVALID_PARAMETERS; } - -30 + let addr = tensor as usize; + unsafe { + if let Ok(tensors) = TENSORS.lock() { + if let Some(t) = tensors.get(&addr) { + match attribute { + VX_TENSOR_NUMBER_OF_DIMS => { + if size != std::mem::size_of::() { + return VX_ERROR_INVALID_PARAMETERS; + } + *(ptr as *mut vx_size) = t.num_dims; + return VX_SUCCESS; + } + VX_TENSOR_DIMS => { + let bytes_needed = t.num_dims * std::mem::size_of::(); + if size < bytes_needed { + return VX_ERROR_INVALID_PARAMETERS; + } + let dst = std::slice::from_raw_parts_mut(ptr as *mut vx_size, t.num_dims); + for i in 0..t.num_dims { + dst[i] = t.dims[i]; + } + return VX_SUCCESS; + } + VX_TENSOR_DATA_TYPE => { + if size != std::mem::size_of::() { + return VX_ERROR_INVALID_PARAMETERS; + } + *(ptr as *mut vx_enum) = t.data_type; + return VX_SUCCESS; + } + VX_TENSOR_FIXED_POINT_POSITION => { + if size != std::mem::size_of::() { + return VX_ERROR_INVALID_PARAMETERS; + } + *(ptr as *mut vx_int8) = 0; + return VX_SUCCESS; + } + _ => return VX_ERROR_NOT_SUPPORTED, + } + } + } + } + VX_ERROR_INVALID_REFERENCE +} + +/// Internal helper: copy tensor data from src to dst. +pub fn copy_tensor_data(src: vx_reference, dst: vx_reference) -> vx_status { + let src_addr = src as usize; + let dst_addr = dst as usize; + + unsafe { + if let Ok(tensors) = TENSORS.lock() { + if let (Some(src_t), Some(dst_t)) = (tensors.get(&src_addr), tensors.get(&dst_addr)) { + if src_t.num_dims != dst_t.num_dims || src_t.data_type != dst_t.data_type { + return VX_ERROR_INVALID_PARAMETERS; + } + for i in 0..src_t.num_dims { + if src_t.dims[i] != dst_t.dims[i] { + return VX_ERROR_INVALID_PARAMETERS; + } + } + if let Ok(data_map) = TENSOR_DATA.lock() { + if let (Some(src_data), Some(dst_data)) = (data_map.get(&src_addr), data_map.get(&dst_addr)) { + if src_data.len() == dst_data.len() { + std::ptr::copy_nonoverlapping(src_data.as_ptr(), dst_data.as_ptr() as *mut u8, src_data.len()); + return VX_SUCCESS; + } + } + } + } + } + } + VX_ERROR_INVALID_PARAMETERS } #[no_mangle] @@ -8423,18 +8688,38 @@ pub extern "C" fn vxUnmapTensorPatch(tensor: vx_tensor, _map_id: usize) -> i32 { #[no_mangle] pub extern "C" fn vxCopyTensorPatch( tensor: vx_tensor, - _number_of_dims: vx_size, + number_of_dims: vx_size, _view_start: *const vx_size, _view_end: *const vx_size, _user_stride: *const vx_size, - _user_ptr: *mut c_void, - _usage: vx_enum, + user_ptr: *mut c_void, + usage: vx_enum, _user_memory_type: vx_enum, ) -> vx_status { - if tensor.is_null() { + if tensor.is_null() || user_ptr.is_null() { return VX_ERROR_INVALID_REFERENCE; } - VX_ERROR_NOT_IMPLEMENTED + let addr = tensor as usize; + + unsafe { + if let Ok(tensors) = TENSORS.lock() { + if let Some(t) = tensors.get(&addr) { + if let Ok(tensor_data_map) = TENSOR_DATA.lock() { + if let Some(data) = tensor_data_map.get(&addr) { + let total_bytes = data.len(); + if usage == crate::c_api::VX_WRITE_ONLY { + std::ptr::copy_nonoverlapping(user_ptr, data.as_ptr() as *mut c_void, total_bytes); + } else { + // VX_READ_ONLY or default + std::ptr::copy_nonoverlapping(data.as_ptr() as *const c_void, user_ptr, total_bytes); + } + return VX_SUCCESS; + } + } + } + } + } + VX_ERROR_INVALID_REFERENCE } /// `vxCopyNode` — create a node that copies one OpenVX object to another. @@ -8451,8 +8736,7 @@ pub extern "C" fn vxCopyNode( input: vx_reference, output: vx_reference, ) -> vx_node { - let _ = (graph, input, output); - std::ptr::null_mut() + create_node_with_params(graph, "org.khronos.openvx.copy", &[input, output]) } #[no_mangle] @@ -10558,14 +10842,15 @@ pub extern "C" fn vxCannyEdgeDetectorNode( pub extern "C" fn vxHoughLinesPNode( graph: vx_graph, input: vx_image, - lines_array: vx_array, hough_lines_params: *const vx_hough_lines_p_t, + lines_array: vx_array, + num_lines: vx_scalar, ) -> vx_node { - if graph.is_null() || input.is_null() || lines_array.is_null() || hough_lines_params.is_null() { + if graph.is_null() || input.is_null() || hough_lines_params.is_null() || lines_array.is_null() || num_lines.is_null() { return std::ptr::null_mut(); } - // HoughLinesP has params: input, lines_array, rho, theta, threshold, line_length, line_gap + // HoughLinesP has params: input, rho, theta, threshold, line_length, line_gap, lines_array let context = crate::c_api::vxGetContext(graph as vx_reference); if context.is_null() { return std::ptr::null_mut(); @@ -12990,50 +13275,206 @@ ev_vxu_stub!(vxuBilateralFilter( dst: vx_tensor, )); -ev_node_stub!(vxLBPNode( +#[no_mangle] +pub extern "C" fn vxLBPNode( graph: vx_graph, input: vx_image, format: vx_enum, kernel_size: vx_int8, output: vx_image, -)); -ev_vxu_stub!(vxuLBP( - context: vx_context, +) -> vx_node { + if graph.is_null() || input.is_null() || output.is_null() { + return std::ptr::null_mut(); + } + + let context = crate::c_api::vxGetContext(graph as vx_reference); + if context.is_null() { + return std::ptr::null_mut(); + } + + unsafe { + let mut format_scalar = vxCreateScalar( + context, + VX_TYPE_ENUM, + &format as *const _ as *const c_void, + ); + let mut kernel_size_scalar = vxCreateScalar( + context, + VX_TYPE_INT8, + &kernel_size as *const _ as *const c_void, + ); + + if format_scalar.is_null() || kernel_size_scalar.is_null() { + vxReleaseScalar(&mut format_scalar); + vxReleaseScalar(&mut kernel_size_scalar); + return std::ptr::null_mut(); + } + + let node = create_node_with_params( + graph, + "org.khronos.openvx.lbp", + &[ + input as vx_reference, + format_scalar as vx_reference, + kernel_size_scalar as vx_reference, + output as vx_reference, + ], + ); + + vxReleaseScalar(&mut format_scalar); + vxReleaseScalar(&mut kernel_size_scalar); + node + } +} + +#[no_mangle] +pub extern "C" fn vxuLBP( + _context: vx_context, input: vx_image, format: vx_enum, kernel_size: vx_int8, output: vx_image, -)); +) -> vx_status { + unsafe { + let ctx = crate::c_api::vxGetContext(input as vx_reference); + let mut format_scalar = crate::unified_c_api::vxCreateScalarWithSize( + ctx, + crate::c_api::VX_TYPE_ENUM, + &format as *const _ as *const c_void, + std::mem::size_of::(), + ); + let mut kernel_size_scalar = crate::unified_c_api::vxCreateScalarWithSize( + ctx, + crate::c_api::VX_TYPE_INT8, + &kernel_size as *const _ as *const c_void, + std::mem::size_of::(), + ); + let status = crate::vxu_impl::vxu_lbp_impl(ctx, input, format_scalar, kernel_size_scalar, output); + if !format_scalar.is_null() { + crate::c_api_data::vxReleaseScalar(&mut format_scalar); + } + if !kernel_size_scalar.is_null() { + crate::c_api_data::vxReleaseScalar(&mut kernel_size_scalar); + } + status + } +} -ev_node_stub!(vxMatchTemplateNode( +#[no_mangle] +pub extern "C" fn vxMatchTemplateNode( graph: vx_graph, src: vx_image, templ: vx_image, matching_method: vx_enum, output: vx_image, -)); -ev_vxu_stub!(vxuMatchTemplate( - context: vx_context, +) -> vx_node { + if graph.is_null() || src.is_null() || templ.is_null() || output.is_null() { + return std::ptr::null_mut(); + } + + let context = crate::c_api::vxGetContext(graph as vx_reference); + if context.is_null() { + return std::ptr::null_mut(); + } + + unsafe { + let mut method_scalar = vxCreateScalar( + context, + VX_TYPE_ENUM, + &matching_method as *const _ as *const c_void, + ); + + if method_scalar.is_null() { + return std::ptr::null_mut(); + } + + let node = create_node_with_params( + graph, + "org.khronos.openvx.match_template", + &[ + src as vx_reference, + templ as vx_reference, + method_scalar as vx_reference, + output as vx_reference, + ], + ); + + vxReleaseScalar(&mut method_scalar); + node + } +} + +#[no_mangle] +pub extern "C" fn vxuMatchTemplate( + _context: vx_context, src: vx_image, templ: vx_image, matching_method: vx_enum, output: vx_image, -)); +) -> vx_status { + if src.is_null() || templ.is_null() || output.is_null() { + return VX_ERROR_INVALID_PARAMETERS; + } + unsafe { + let ctx = crate::c_api::vxGetContext(src as vx_reference); + let mut method_scalar = crate::unified_c_api::vxCreateScalarWithSize( + ctx, + crate::c_api::VX_TYPE_ENUM, + &matching_method as *const _ as *const c_void, + std::mem::size_of::(), + ); + let status = crate::vxu_impl::vxu_match_template_impl(ctx, src, templ, method_scalar, output); + if !method_scalar.is_null() { + crate::c_api_data::vxReleaseScalar(&mut method_scalar); + } + status + } +} -ev_node_stub!(vxNonMaxSuppressionNode( +#[no_mangle] +pub extern "C" fn vxNonMaxSuppressionNode( graph: vx_graph, input: vx_image, mask: vx_image, win_size: vx_int32, output: vx_image, -)); -ev_vxu_stub!(vxuNonMaxSuppression( - context: vx_context, - input: vx_image, - mask: vx_image, - win_size: vx_int32, - output: vx_image, -)); +) -> vx_node { + if graph.is_null() || input.is_null() || output.is_null() { + return std::ptr::null_mut(); + } + + let context = crate::c_api::vxGetContext(graph as vx_reference); + if context.is_null() { + return std::ptr::null_mut(); + } + + unsafe { + let mut win_scalar = vxCreateScalar( + context, + VX_TYPE_INT32, + &win_size as *const _ as *const c_void, + ); + + if win_scalar.is_null() { + return std::ptr::null_mut(); + } + + let node = create_node_with_params( + graph, + "org.khronos.openvx.non_max_suppression", + &[ + input as vx_reference, + mask as vx_reference, + win_scalar as vx_reference, + output as vx_reference, + ], + ); + + vxReleaseScalar(&mut win_scalar); + node + } +} +// ev_vxu_stub! removed - replaced with real implementation below ev_node_stub!(vxHOGCellsNode( graph: vx_graph, @@ -13080,14 +13521,95 @@ ev_vxu_stub!(vxuHOGFeatures( // has never been exported. Both stay as stubs in Phase 1; they will be // replaced with real implementations in a follow-up PR (the CTS Copy and // Houghlinesp tests are not in the Phase-1 filter). -ev_vxu_stub!(vxuCopy(context: vx_context, input: vx_reference, output: vx_reference)); -ev_vxu_stub!(vxuHoughLinesP( - context: vx_context, +#[no_mangle] +pub extern "C" fn vxuCopy( + _context: vx_context, + input: vx_reference, + output: vx_reference, +) -> vx_status { + unsafe { crate::vxu_impl::vxu_copy_impl(input, output) } +} + +#[no_mangle] +pub extern "C" fn vxuNonMaxSuppression( + _context: vx_context, + input: vx_image, + mask: vx_image, + win_size: vx_int32, + output: vx_image, +) -> vx_status { + unsafe { + let ctx = crate::c_api::vxGetContext(input as vx_reference); + let mut win_scalar = crate::unified_c_api::vxCreateScalarWithSize( + ctx, + crate::c_api::VX_TYPE_INT32, + &win_size as *const _ as *const c_void, + std::mem::size_of::(), + ); + let status = crate::vxu_impl::vxu_non_max_suppression_impl(ctx, input, mask, win_scalar, output); + if !win_scalar.is_null() { + crate::c_api_data::vxReleaseScalar(&mut win_scalar); + } + status + } +} +#[no_mangle] +pub extern "C" fn vxuHoughLinesP( + _context: vx_context, input: vx_image, params: *const vx_hough_lines_p_t, lines_array: vx_array, num_lines: vx_scalar, -)); +) -> vx_status { + unsafe { + let ctx = crate::c_api::vxGetContext(input as vx_reference); + let mut rho_scalar = crate::unified_c_api::vxCreateScalarWithSize( + ctx, + crate::c_api::VX_TYPE_FLOAT32, + &(*params).rho as *const _ as *const c_void, + std::mem::size_of::(), + ); + let mut theta_scalar = crate::unified_c_api::vxCreateScalarWithSize( + ctx, + crate::c_api::VX_TYPE_FLOAT32, + &(*params).theta as *const _ as *const c_void, + std::mem::size_of::(), + ); + let mut threshold_scalar = crate::unified_c_api::vxCreateScalarWithSize( + ctx, + crate::c_api::VX_TYPE_UINT32, + &(*params).threshold as *const _ as *const c_void, + std::mem::size_of::(), + ); + let mut line_length_scalar = crate::unified_c_api::vxCreateScalarWithSize( + ctx, + crate::c_api::VX_TYPE_UINT32, + &(*params).line_length as *const _ as *const c_void, + std::mem::size_of::(), + ); + let mut line_gap_scalar = crate::unified_c_api::vxCreateScalarWithSize( + ctx, + crate::c_api::VX_TYPE_UINT32, + &(*params).line_gap as *const _ as *const c_void, + std::mem::size_of::(), + ); + let status = crate::vxu_impl::vxu_hough_lines_p_impl( + ctx, input, + rho_scalar, + theta_scalar, + threshold_scalar, + line_length_scalar, + line_gap_scalar, + lines_array, + ); + if !rho_scalar.is_null() { crate::c_api_data::vxReleaseScalar(&mut rho_scalar); } + if !theta_scalar.is_null() { crate::c_api_data::vxReleaseScalar(&mut theta_scalar); } + if !threshold_scalar.is_null() { crate::c_api_data::vxReleaseScalar(&mut threshold_scalar); } + if !line_length_scalar.is_null() { crate::c_api_data::vxReleaseScalar(&mut line_length_scalar); } + if !line_gap_scalar.is_null() { crate::c_api_data::vxReleaseScalar(&mut line_gap_scalar); } + status + } +} // ---- Control flow ---- ev_node_stub!(vxScalarOperationNode( diff --git a/openvx-core/src/vxu_impl.rs b/openvx-core/src/vxu_impl.rs index 5c3a6b5c..a8f4fd04 100644 --- a/openvx-core/src/vxu_impl.rs +++ b/openvx-core/src/vxu_impl.rs @@ -22,9 +22,12 @@ use crate::c_api::{ vx_enum, vx_float32, vx_image, + vx_int32, + vx_lut, vx_map_id, vx_matrix, vx_pyramid, + vx_reference, vx_scalar, vx_size, vx_status, @@ -37,7 +40,20 @@ use crate::c_api::{ VX_ERROR_INVALID_REFERENCE, VX_ERROR_NOT_IMPLEMENTED, VX_SUCCESS, -}; + VX_TYPE_UINT8, + VX_TYPE_INT8, + VX_TYPE_UINT16, + VX_TYPE_INT16, + VX_TYPE_UINT32, + VX_TYPE_INT32, + VX_TYPE_FLOAT32, + VX_TYPE_UINT64, + VX_TYPE_INT64, + VX_TYPE_FLOAT64, + VX_ARRAY_ITEMSIZE, + VX_THRESHOLD_VALUE, + VX_THRESHOLD_LOWER, + VX_THRESHOLD_UPPER,}; use crate::unified_c_api::{vx_border_t, vx_distribution, vx_remap, VxCImage, VxCPyramid}; use std::ffi::c_void; @@ -299,7 +315,7 @@ impl Image { fn df_image_to_format(df: vx_df_image) -> Option { match df { 0x38303055 => Some(ImageFormat::Gray), // VX_DF_IMAGE_U8 ('U008') - 0x31305555 => Some(ImageFormat::GrayU16), // VX_DF_IMAGE_U16 ('U016') + 0x31305555 => Some(ImageFormat::GrayU16), // 0x31303055u326 ('U016') 0x53313053 => Some(ImageFormat::GrayS16), // VX_DF_IMAGE_S16 ('S016') - CORRECTED 0x36313053 => Some(ImageFormat::GrayS16), // Alternative S16 format code 0x32333055 => Some(ImageFormat::GrayU32), // VX_DF_IMAGE_U32 ('U032') @@ -6744,6 +6760,1295 @@ pub fn vxu_max_impl( } } +/// Enhanced Vision: Copy - copies data from input object to output object +pub fn vxu_copy_impl(input: vx_reference, output: vx_reference) -> vx_status { + if input.is_null() || output.is_null() { + return VX_ERROR_INVALID_REFERENCE; + } + + extern "C" { + fn vxQueryReference(ref_: vx_reference, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxMapImagePatch(image: vx_image, rect: *const c_void, plane_index: u32, map_id: *mut vx_map_id, addr: *mut c_void, ptr: *mut *mut c_void, usage: vx_enum, mem_type: vx_enum, flags: u32) -> vx_status; + fn vxUnmapImagePatch(image: vx_image, map_id: vx_map_id) -> vx_status; + fn vxQueryImage(image: vx_image, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxQueryArray(array: vx_array, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxMapArrayRange(array: vx_array, start: vx_size, end: vx_size, map_id: *mut vx_map_id, stride: *mut vx_size, ptr: *mut *mut c_void, usage: vx_enum, mem_type: vx_enum, flags: u32) -> vx_status; + fn vxUnmapArrayRange(array: vx_array, map_id: vx_map_id) -> vx_status; + fn vxAddArrayItems(array: vx_array, count: vx_size, ptr: *const c_void, stride: vx_size) -> vx_status; + fn vxTruncateArray(array: vx_array, new_num_items: vx_size) -> vx_status; + fn vxQueryScalar(scalar: vx_scalar, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxCopyMatrix(matrix: vx_matrix, ptr: *mut c_void, usage: vx_enum, mem_type: vx_enum) -> vx_status; + fn vxQueryMatrix(matrix: vx_matrix, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxCopyConvolutionCoefficients(conv: vx_convolution, ptr: *mut c_void, usage: vx_enum, mem_type: vx_enum) -> vx_status; + fn vxQueryConvolution(conv: vx_convolution, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxCopyDistribution(dist: vx_distribution, ptr: *mut c_void, usage: vx_enum, mem_type: vx_enum) -> vx_status; + fn vxQueryDistribution(dist: vx_distribution, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxCopyLUT(lut: vx_lut, ptr: *mut c_void, usage: vx_enum, mem_type: vx_enum) -> vx_status; + fn vxQueryLUT(lut: vx_lut, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxQueryThreshold(thresh: vx_threshold, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxQueryRemap(remap: vx_remap, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxQueryPyramid(pyramid: vx_pyramid, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxGetPyramidLevel(pyramid: vx_pyramid, index: u32) -> vx_image; + fn vxQueryObjectArray(obj_arr: vx_reference, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxGetObjectArrayItem(obj_arr: vx_reference, index: u32) -> vx_reference; + fn vxSetObjectArrayItem(obj_arr: vx_reference, index: u32, item: vx_reference) -> vx_status; + fn vxReleaseReference(ref_ptr: *mut vx_reference) -> vx_status; + fn vxReleaseImage(img: *mut vx_image) -> vx_status; + } + + unsafe { + let mut ref_type: vx_enum = 0; + let status = vxQueryReference(input, crate::unified_c_api::VX_REFERENCE_ATTRIBUTE_TYPE, &mut ref_type as *mut _ as *mut c_void, std::mem::size_of::()); + if status != VX_SUCCESS { + return status; + } + + match ref_type { + // Image copy using vxCopyImagePatch + crate::unified_c_api::VX_TYPE_IMAGE => { + let mut src_format: vx_df_image = 0; + let mut src_width: vx_uint32 = 0; + let mut src_height: vx_uint32 = 0; + let s1 = vxQueryImage(input as vx_image, crate::c_api::VX_IMAGE_FORMAT, &mut src_format as *mut _ as *mut c_void, std::mem::size_of::()); + let s2 = vxQueryImage(input as vx_image, crate::c_api::VX_IMAGE_WIDTH, &mut src_width as *mut _ as *mut c_void, std::mem::size_of::()); + let s3 = vxQueryImage(input as vx_image, crate::c_api::VX_IMAGE_HEIGHT, &mut src_height as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS || s2 != VX_SUCCESS || s3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut dst_format: vx_df_image = 0; + let mut dst_width: vx_uint32 = 0; + let mut dst_height: vx_uint32 = 0; + let d1 = vxQueryImage(output as vx_image, crate::c_api::VX_IMAGE_FORMAT, &mut dst_format as *mut _ as *mut c_void, std::mem::size_of::()); + let d2 = vxQueryImage(output as vx_image, crate::c_api::VX_IMAGE_WIDTH, &mut dst_width as *mut _ as *mut c_void, std::mem::size_of::()); + let d3 = vxQueryImage(output as vx_image, crate::c_api::VX_IMAGE_HEIGHT, &mut dst_height as *mut _ as *mut c_void, std::mem::size_of::()); + if d1 != VX_SUCCESS || d2 != VX_SUCCESS || d3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + if src_format != dst_format || src_width != dst_width || src_height != dst_height { + return VX_ERROR_INVALID_PARAMETERS; + } + + // For simple U8 images, use map/unmap + let rect = crate::c_api::vx_rectangle_t { + start_x: 0, + start_y: 0, + end_x: src_width as u32, + end_y: src_height as u32, + }; + let mut src_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut dst_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut src_ptr: *mut c_void = std::ptr::null_mut(); + let mut dst_ptr: *mut c_void = std::ptr::null_mut(); + let mut src_map_id: vx_map_id = 0; + let mut dst_map_id: vx_map_id = 0; + + let s_map = vxMapImagePatch( + input as vx_image, + &rect as *const _ as *const c_void, + 0, + &mut src_map_id, + &mut src_addr as *mut _ as *mut c_void, + &mut src_ptr as *mut *mut c_void, + crate::c_api::VX_READ_ONLY, + crate::c_api::VX_MEMORY_TYPE_HOST, + 0, + ); + if s_map != VX_SUCCESS { + return s_map; + } + + let d_map = vxMapImagePatch( + output as vx_image, + &rect as *const _ as *const c_void, + 0, + &mut dst_map_id, + &mut dst_addr as *mut _ as *mut c_void, + &mut dst_ptr as *mut *mut c_void, + crate::c_api::VX_WRITE_ONLY, + crate::c_api::VX_MEMORY_TYPE_HOST, + 0, + ); + if d_map != VX_SUCCESS { + vxUnmapImagePatch(input as vx_image, src_map_id); + return d_map; + } + + // Copy the data + let row_bytes = src_addr.stride_x * src_width as i32; + if row_bytes > 0 { + for y in 0..src_height { + let src_row = (src_ptr as *mut u8).wrapping_add((y as i32 * src_addr.stride_y) as usize); + let dst_row = (dst_ptr as *mut u8).wrapping_add((y as i32 * dst_addr.stride_y) as usize); + std::ptr::copy_nonoverlapping(src_row, dst_row, row_bytes as usize); + } + } + + vxUnmapImagePatch(output as vx_image, dst_map_id); + vxUnmapImagePatch(input as vx_image, src_map_id); + VX_SUCCESS + } + + // Array copy + crate::unified_c_api::VX_TYPE_ARRAY => { + let mut item_type: vx_enum = 0; + let mut num_items: vx_size = 0; + let mut capacity: vx_size = 0; + let s1 = vxQueryArray(input as vx_array, crate::c_api::VX_ARRAY_ITEMTYPE, &mut item_type as *mut _ as *mut c_void, std::mem::size_of::()); + let s2 = vxQueryArray(input as vx_array, crate::c_api::VX_ARRAY_NUMITEMS, &mut num_items as *mut _ as *mut c_void, std::mem::size_of::()); + let s3 = vxQueryArray(input as vx_array, crate::c_api::VX_ARRAY_CAPACITY, &mut capacity as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS || s2 != VX_SUCCESS || s3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut dst_item_type: vx_enum = 0; + let mut dst_capacity: vx_size = 0; + let d1 = vxQueryArray(output as vx_array, crate::c_api::VX_ARRAY_ITEMTYPE, &mut dst_item_type as *mut _ as *mut c_void, std::mem::size_of::()); + let d2 = vxQueryArray(output as vx_array, crate::c_api::VX_ARRAY_CAPACITY, &mut dst_capacity as *mut _ as *mut c_void, std::mem::size_of::()); + if d1 != VX_SUCCESS || d2 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + if item_type != dst_item_type { + return VX_ERROR_INVALID_PARAMETERS; + } + + if num_items > dst_capacity { + return VX_ERROR_INVALID_PARAMETERS; + } + + // Query item size directly from the array + let mut item_size: vx_size = 0; + let sz1 = vxQueryArray(input as vx_array, crate::c_api::VX_ARRAY_ITEMSIZE, &mut item_size as *mut _ as *mut c_void, std::mem::size_of::()); + if sz1 != VX_SUCCESS || item_size == 0 { + return VX_ERROR_INVALID_PARAMETERS; + } + + if num_items == 0 { + // Just truncate destination to 0 + vxTruncateArray(output as vx_array, 0) + } else { + let mut src_map_id: vx_map_id = 0; + let mut src_stride: vx_size = 0; + let mut src_ptr: *mut c_void = std::ptr::null_mut(); + let s_map = vxMapArrayRange( + input as vx_array, + 0, + num_items, + &mut src_map_id, + &mut src_stride, + &mut src_ptr, + crate::c_api::VX_READ_ONLY, + crate::c_api::VX_MEMORY_TYPE_HOST, + 0, + ); + if s_map != VX_SUCCESS { + return s_map; + } + + // Truncate destination and add items + let t1 = vxTruncateArray(output as vx_array, 0); + if t1 != VX_SUCCESS { + vxUnmapArrayRange(input as vx_array, src_map_id); + return t1; + } + + let add_status = vxAddArrayItems(output as vx_array, num_items, src_ptr, src_stride); + vxUnmapArrayRange(input as vx_array, src_map_id); + add_status + } + } + + // Scalar copy - read value from input, write to output + crate::unified_c_api::VX_TYPE_SCALAR => { + let mut data_type: vx_enum = 0; + let s1 = vxQueryScalar(input as vx_scalar, crate::c_api::VX_SCALAR_TYPE, &mut data_type as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut dst_data_type: vx_enum = 0; + let d1 = vxQueryScalar(output as vx_scalar, crate::c_api::VX_SCALAR_TYPE, &mut dst_data_type as *mut _ as *mut c_void, std::mem::size_of::()); + if d1 != VX_SUCCESS || data_type != dst_data_type { + return VX_ERROR_INVALID_PARAMETERS; + } + + // Read input scalar value using internal data access + let src_val = { + let s = &*(input as *const crate::c_api_data::VxCScalarData); + s.data.clone() + }; + + // Write to output scalar + { + let d = &mut *(output as *mut crate::c_api_data::VxCScalarData); + d.data = src_val; + } + VX_SUCCESS + } + + // Matrix copy + crate::unified_c_api::VX_TYPE_MATRIX => { + let mut rows: vx_size = 0; + let mut cols: vx_size = 0; + let mut data_type: vx_enum = 0; + let s1 = vxQueryMatrix(input as vx_matrix, crate::unified_c_api::VX_MATRIX_ROWS, &mut rows as *mut _ as *mut c_void, std::mem::size_of::()); + let s2 = vxQueryMatrix(input as vx_matrix, crate::unified_c_api::VX_MATRIX_COLUMNS, &mut cols as *mut _ as *mut c_void, std::mem::size_of::()); + let s3 = vxQueryMatrix(input as vx_matrix, crate::unified_c_api::VX_MATRIX_TYPE, &mut data_type as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS || s2 != VX_SUCCESS || s3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut dst_rows: vx_size = 0; + let mut dst_cols: vx_size = 0; + let mut dst_type: vx_enum = 0; + let d1 = vxQueryMatrix(output as vx_matrix, crate::unified_c_api::VX_MATRIX_ROWS, &mut dst_rows as *mut _ as *mut c_void, std::mem::size_of::()); + let d2 = vxQueryMatrix(output as vx_matrix, crate::unified_c_api::VX_MATRIX_COLUMNS, &mut dst_cols as *mut _ as *mut c_void, std::mem::size_of::()); + let d3 = vxQueryMatrix(output as vx_matrix, crate::unified_c_api::VX_MATRIX_TYPE, &mut dst_type as *mut _ as *mut c_void, std::mem::size_of::()); + if d1 != VX_SUCCESS || d2 != VX_SUCCESS || d3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + if rows != dst_rows || cols != dst_cols || data_type != dst_type { + return VX_ERROR_INVALID_PARAMETERS; + } + + let elem_size = if data_type == VX_TYPE_INT8 || data_type == VX_TYPE_UINT8 { + 1 + } else if data_type == VX_TYPE_INT16 || data_type == VX_TYPE_UINT16 { + 2 + } else if data_type == VX_TYPE_INT32 || data_type == VX_TYPE_UINT32 || data_type == VX_TYPE_FLOAT32 { + 4 + } else if data_type == VX_TYPE_FLOAT64 { + 8 + } else { + return VX_ERROR_INVALID_PARAMETERS; + }; + + let total_size = rows * cols * elem_size; + let mut buffer = vec![0u8; total_size]; + let read_status = vxCopyMatrix(input as vx_matrix, buffer.as_mut_ptr() as *mut c_void, crate::c_api::VX_READ_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST); + if read_status != VX_SUCCESS { + return read_status; + } + vxCopyMatrix(output as vx_matrix, buffer.as_mut_ptr() as *mut c_void, crate::c_api::VX_WRITE_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST) + } + + // Convolution copy + crate::unified_c_api::VX_TYPE_CONVOLUTION => { + let mut rows: vx_size = 0; + let mut cols: vx_size = 0; + let s1 = vxQueryConvolution(input as vx_convolution, crate::unified_c_api::VX_CONVOLUTION_ROWS, &mut rows as *mut _ as *mut c_void, std::mem::size_of::()); + let s2 = vxQueryConvolution(input as vx_convolution, crate::unified_c_api::VX_CONVOLUTION_COLUMNS, &mut cols as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS || s2 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut dst_rows: vx_size = 0; + let mut dst_cols: vx_size = 0; + let d1 = vxQueryConvolution(output as vx_convolution, crate::unified_c_api::VX_CONVOLUTION_ROWS, &mut dst_rows as *mut _ as *mut c_void, std::mem::size_of::()); + let d2 = vxQueryConvolution(output as vx_convolution, crate::unified_c_api::VX_CONVOLUTION_COLUMNS, &mut dst_cols as *mut _ as *mut c_void, std::mem::size_of::()); + if d1 != VX_SUCCESS || d2 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + if rows != dst_rows || cols != dst_cols { + return VX_ERROR_INVALID_PARAMETERS; + } + + let total_size = rows * cols * std::mem::size_of::(); + let mut buffer = vec![0u8; total_size]; + let read_status = vxCopyConvolutionCoefficients(input as vx_convolution, buffer.as_mut_ptr() as *mut c_void, crate::c_api::VX_READ_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST); + if read_status != VX_SUCCESS { + return read_status; + } + vxCopyConvolutionCoefficients(output as vx_convolution, buffer.as_mut_ptr() as *mut c_void, crate::c_api::VX_WRITE_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST) + } + + // Distribution copy + crate::unified_c_api::VX_TYPE_DISTRIBUTION => { + let mut num_bins: vx_size = 0; + let s1 = vxQueryDistribution(input as vx_distribution, crate::unified_c_api::VX_DISTRIBUTION_BINS, &mut num_bins as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut dst_bins: vx_size = 0; + let d1 = vxQueryDistribution(output as vx_distribution, crate::unified_c_api::VX_DISTRIBUTION_BINS, &mut dst_bins as *mut _ as *mut c_void, std::mem::size_of::()); + if d1 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + if num_bins != dst_bins { + return VX_ERROR_INVALID_PARAMETERS; + } + + let total_size = num_bins * std::mem::size_of::(); + let mut buffer = vec![0u8; total_size]; + let read_status = vxCopyDistribution(input as vx_distribution, buffer.as_mut_ptr() as *mut c_void, crate::c_api::VX_READ_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST); + if read_status != VX_SUCCESS { + return read_status; + } + vxCopyDistribution(output as vx_distribution, buffer.as_mut_ptr() as *mut c_void, crate::c_api::VX_WRITE_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST) + } + + // LUT copy + crate::unified_c_api::VX_TYPE_LUT => { + let mut num_items: vx_size = 0; + let s1 = vxQueryLUT(input as vx_lut, crate::unified_c_api::VX_LUT_COUNT, &mut num_items as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut dst_items: vx_size = 0; + let d1 = vxQueryLUT(output as vx_lut, crate::unified_c_api::VX_LUT_COUNT, &mut dst_items as *mut _ as *mut c_void, std::mem::size_of::()); + if d1 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + if num_items != dst_items { + return VX_ERROR_INVALID_PARAMETERS; + } + + let total_size = num_items * std::mem::size_of::(); + let mut buffer = vec![0u8; total_size]; + let read_status = vxCopyLUT(input as vx_lut, buffer.as_mut_ptr() as *mut c_void, crate::c_api::VX_READ_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST); + if read_status != VX_SUCCESS { + return read_status; + } + vxCopyLUT(output as vx_lut, buffer.as_mut_ptr() as *mut c_void, crate::c_api::VX_WRITE_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST) + } + + // Threshold copy — use direct struct access, vxQueryThreshold does not support VALUE/LOWER/UPPER + crate::unified_c_api::VX_TYPE_THRESHOLD => { + let src = &*(input as *const crate::c_api_data::VxCThresholdData); + let dst = &mut *(output as *mut crate::c_api_data::VxCThresholdData); + if src.thresh_type != dst.thresh_type { + return VX_ERROR_INVALID_PARAMETERS; + } + dst.value = src.value; + dst.lower = src.lower; + dst.upper = src.upper; + dst.true_value = src.true_value; + dst.false_value = src.false_value; + VX_SUCCESS + } + + // Remap copy + crate::unified_c_api::VX_TYPE_REMAP => { + let mut src_width: vx_size = 0; + let mut src_height: vx_size = 0; + let mut dst_width: vx_size = 0; + let mut dst_height: vx_size = 0; + let s1 = vxQueryRemap(input as vx_remap, crate::unified_c_api::VX_REMAP_SOURCE_WIDTH, &mut src_width as *mut _ as *mut c_void, std::mem::size_of::()); + let s2 = vxQueryRemap(input as vx_remap, crate::unified_c_api::VX_REMAP_SOURCE_HEIGHT, &mut src_height as *mut _ as *mut c_void, std::mem::size_of::()); + let s3 = vxQueryRemap(input as vx_remap, crate::unified_c_api::VX_REMAP_DESTINATION_WIDTH, &mut dst_width as *mut _ as *mut c_void, std::mem::size_of::()); + let s4 = vxQueryRemap(input as vx_remap, crate::unified_c_api::VX_REMAP_DESTINATION_HEIGHT, &mut dst_height as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS || s2 != VX_SUCCESS || s3 != VX_SUCCESS || s4 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut d_src_width: vx_size = 0; + let mut d_src_height: vx_size = 0; + let mut d_dst_width: vx_size = 0; + let mut d_dst_height: vx_size = 0; + let d1 = vxQueryRemap(output as vx_remap, crate::unified_c_api::VX_REMAP_SOURCE_WIDTH, &mut d_src_width as *mut _ as *mut c_void, std::mem::size_of::()); + let d2 = vxQueryRemap(output as vx_remap, crate::unified_c_api::VX_REMAP_SOURCE_HEIGHT, &mut d_src_height as *mut _ as *mut c_void, std::mem::size_of::()); + let d3 = vxQueryRemap(output as vx_remap, crate::unified_c_api::VX_REMAP_DESTINATION_WIDTH, &mut d_dst_width as *mut _ as *mut c_void, std::mem::size_of::()); + let d4 = vxQueryRemap(output as vx_remap, crate::unified_c_api::VX_REMAP_DESTINATION_HEIGHT, &mut d_dst_height as *mut _ as *mut c_void, std::mem::size_of::()); + if d1 != VX_SUCCESS || d2 != VX_SUCCESS || d3 != VX_SUCCESS || d4 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + if src_width != d_src_width || src_height != d_src_height || dst_width != d_dst_width || dst_height != d_dst_height { + return VX_ERROR_INVALID_PARAMETERS; + } + + // Use direct internal access for remap data + { + let src = &*(input as *const crate::unified_c_api::VxCRemap); + let dst = &mut *(output as *mut crate::unified_c_api::VxCRemap); + if let Ok(mut dst_map) = dst.map_data.try_write() { + if let Ok(src_map) = src.map_data.try_read() { + dst_map.clone_from(&*src_map); + } + } + } + VX_SUCCESS + } + + // Pyramid copy + crate::unified_c_api::VX_TYPE_PYRAMID => { + let mut levels: vx_size = 0; + let s1 = vxQueryPyramid(input as vx_pyramid, crate::unified_c_api::VX_PYRAMID_LEVELS, &mut levels as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut dst_levels: vx_size = 0; + let d1 = vxQueryPyramid(output as vx_pyramid, crate::unified_c_api::VX_PYRAMID_LEVELS, &mut dst_levels as *mut _ as *mut c_void, std::mem::size_of::()); + if d1 != VX_SUCCESS || levels != dst_levels { + return VX_ERROR_INVALID_PARAMETERS; + } + + for i in 0..levels as u32 { + let src_level = vxGetPyramidLevel(input as vx_pyramid, i); + let dst_level = vxGetPyramidLevel(output as vx_pyramid, i); + if src_level.is_null() || dst_level.is_null() { + return VX_ERROR_INVALID_REFERENCE; + } + let level_status = vxu_copy_impl(src_level as vx_reference, dst_level as vx_reference); + let mut s = src_level; + let mut d = dst_level; + vxReleaseImage(&mut s); + vxReleaseImage(&mut d); + if level_status != VX_SUCCESS { + return level_status; + } + } + VX_SUCCESS + } + + // ObjectArray copy + crate::unified_c_api::VX_TYPE_OBJECT_ARRAY => { + let mut count: vx_size = 0; + let s1 = vxQueryObjectArray(input, crate::unified_c_api::VX_OBJECT_ARRAY_NUMITEMS, &mut count as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + for i in 0..count { + let src_item = vxGetObjectArrayItem(input, i as u32); + if src_item.is_null() { + return VX_ERROR_INVALID_PARAMETERS; + } + let set_status = vxSetObjectArrayItem(output, i as u32, src_item); + let mut r = src_item; + vxReleaseReference(&mut r); + if set_status != VX_SUCCESS { + return set_status; + } + } + VX_SUCCESS + } + + // Tensor copy + crate::unified_c_api::VX_TYPE_TENSOR => { + crate::unified_c_api::copy_tensor_data(input, output) + } + + _ => VX_ERROR_NOT_IMPLEMENTED, + } + } +} + +/// Enhanced Vision: NonMaxSuppression +pub fn vxu_non_max_suppression_impl(_ctx: vx_context, input: vx_image, mask: vx_image, win_size: vx_scalar, output: vx_image) -> vx_status { + if input.is_null() || output.is_null() || win_size.is_null() { + return VX_ERROR_INVALID_PARAMETERS; + } + + extern "C" { + fn vxQueryImage(image: vx_image, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxMapImagePatch(image: vx_image, rect: *const c_void, plane_index: u32, map_id: *mut vx_map_id, addr: *mut c_void, ptr: *mut *mut c_void, usage: vx_enum, mem_type: vx_enum, flags: u32) -> vx_status; + fn vxUnmapImagePatch(image: vx_image, map_id: vx_map_id) -> vx_status; + fn vxGetValidRegionImage(image: vx_image, rect: *mut c_void) -> vx_status; + } + + unsafe { + // Read win_size from scalar data (direct vec access, INT32 is 4 bytes) + let ws_data = &*(win_size as *const crate::c_api_data::VxCScalarData); + if ws_data.data.len() != 4 { + return VX_ERROR_INVALID_PARAMETERS; + } + let wsize = i32::from_ne_bytes([ws_data.data[0], ws_data.data[1], ws_data.data[2], ws_data.data[3]]); + + if wsize <= 0 || wsize % 2 == 0 { + return VX_ERROR_INVALID_PARAMETERS; + } + let border = wsize / 2; + + // Get image info + let mut src_width: vx_uint32 = 0; + let mut src_height: vx_uint32 = 0; + let mut src_format: vx_df_image = 0; + let s1 = vxQueryImage(input, crate::c_api::VX_IMAGE_WIDTH, &mut src_width as *mut _ as *mut c_void, std::mem::size_of::()); + let s2 = vxQueryImage(input, crate::c_api::VX_IMAGE_HEIGHT, &mut src_height as *mut _ as *mut c_void, std::mem::size_of::()); + let s3 = vxQueryImage(input, crate::c_api::VX_IMAGE_FORMAT, &mut src_format as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS || s2 != VX_SUCCESS || s3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut dst_format: vx_df_image = 0; + let d3 = vxQueryImage(output, crate::c_api::VX_IMAGE_FORMAT, &mut dst_format as *mut _ as *mut c_void, std::mem::size_of::()); + if d3 != VX_SUCCESS || src_format != dst_format { + return VX_ERROR_INVALID_PARAMETERS; + } + + // Get valid region + let mut src_rect: crate::c_api::vx_rectangle_t = std::mem::zeroed(); + let vr = vxGetValidRegionImage(input, &mut src_rect as *mut _ as *mut c_void); + if vr != VX_SUCCESS { + return vr; + } + + let rect = crate::c_api::vx_rectangle_t { + start_x: 0, + start_y: 0, + end_x: src_width, + end_y: src_height, + }; + + let mut src_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut dst_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut src_ptr: *mut c_void = std::ptr::null_mut(); + let mut dst_ptr: *mut c_void = std::ptr::null_mut(); + let mut src_map_id: vx_map_id = 0; + let mut dst_map_id: vx_map_id = 0; + + let s_map = vxMapImagePatch(input, &rect as *const _ as *const c_void, 0, &mut src_map_id, &mut src_addr as *mut _ as *mut c_void, &mut src_ptr as *mut *mut c_void, crate::c_api::VX_READ_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST, 0); + if s_map != VX_SUCCESS { + return s_map; + } + + let d_map = vxMapImagePatch(output, &rect as *const _ as *const c_void, 0, &mut dst_map_id, &mut dst_addr as *mut _ as *mut c_void, &mut dst_ptr as *mut *mut c_void, crate::c_api::VX_WRITE_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST, 0); + if d_map != VX_SUCCESS { + vxUnmapImagePatch(input, src_map_id); + return d_map; + } + + // Mask info + let mut mask_ptr: *mut c_void = std::ptr::null_mut(); + let mut mask_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut mask_map_id: vx_map_id = 0; + let mut has_mask = false; + + if !mask.is_null() { + let m_map = vxMapImagePatch(mask, &rect as *const _ as *const c_void, 0, &mut mask_map_id, &mut mask_addr as *mut _ as *mut c_void, &mut mask_ptr as *mut *mut c_void, crate::c_api::VX_READ_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST, 0); + if m_map == VX_SUCCESS { + has_mask = true; + } + } + + let rect_start_x = src_rect.start_x as i32; + let rect_start_y = src_rect.start_y as i32; + let rect_width = (src_rect.end_x - src_rect.start_x) as i32; + let rect_height = (src_rect.end_y - src_rect.start_y) as i32; + + if src_format == VX_DF_IMAGE_U8 as vx_df_image { + for x in (rect_start_x + border)..(rect_start_x + rect_width - border) { + for y in (rect_start_y + border)..(rect_start_y + rect_height - border) { + let src_val = { + let row = (src_ptr as *mut u8).wrapping_add((y as i32 * src_addr.stride_y) as usize); + *row.add((x as i32 * src_addr.stride_x) as usize) + } as i32; + + let mut mask_val: u8 = 0; + if has_mask { + let row = (mask_ptr as *mut u8).wrapping_add((y as i32 * mask_addr.stride_y) as usize); + mask_val = *row.add((x as i32 * mask_addr.stride_x) as usize); + } + + let dst_row = (dst_ptr as *mut u8).wrapping_add((y as i32 * dst_addr.stride_y) as usize); + let dst_pixel = dst_row.add((x as i32 * dst_addr.stride_x) as usize); + + if mask_val != 0 { + *dst_pixel = src_val as u8; + } else { + let mut flag = true; + for i in -border..=border { + for j in -border..=border { + let nx = x + i; + let ny = y + j; + let neighbor_val = { + let row = (src_ptr as *mut u8).wrapping_add((ny as i32 * src_addr.stride_y) as usize); + *row.add((nx as i32 * src_addr.stride_x) as usize) as i32 + }; + + let mut neighbor_mask: u8 = 0; + if has_mask { + let row = (mask_ptr as *mut u8).wrapping_add((ny as i32 * mask_addr.stride_y) as usize); + neighbor_mask = *row.add((nx as i32 * mask_addr.stride_x) as usize); + } + + if neighbor_mask == 0 { + let is_before = j < 0 || (j == 0 && i <= 0); + let is_after = j > 0 || (j == 0 && i > 0); + if (is_before && src_val < neighbor_val) || (is_after && src_val <= neighbor_val) { + flag = false; + break; + } + } + } + if !flag { + break; + } + } + + if flag { + *dst_pixel = src_val as u8; + } else { + *dst_pixel = 0; + } + } + } + } + } else if src_format == VX_DF_IMAGE_S16 as vx_df_image { + for x in (rect_start_x + border)..(rect_start_x + rect_width - border) { + for y in (rect_start_y + border)..(rect_start_y + rect_height - border) { + let src_val = { + let row = (src_ptr as *mut i16).wrapping_add((y as i32 * src_addr.stride_y) as usize / 2); + *row.add((x as i32 * src_addr.stride_x) as usize / 2) + } as i32; + + let mut mask_val: u8 = 0; + if has_mask { + let row = (mask_ptr as *mut u8).wrapping_add((y as i32 * mask_addr.stride_y) as usize); + mask_val = *row.add((x as i32 * mask_addr.stride_x) as usize); + } + + let dst_row = (dst_ptr as *mut i16).wrapping_add((y as i32 * dst_addr.stride_y) as usize / 2); + let dst_pixel = dst_row.add((x as i32 * dst_addr.stride_x) as usize / 2); + + if mask_val != 0 { + *dst_pixel = src_val as i16; + } else { + let mut flag = true; + for i in -border..=border { + for j in -border..=border { + let nx = x + i; + let ny = y + j; + let neighbor_val = { + let row = (src_ptr as *mut i16).wrapping_add((ny as i32 * src_addr.stride_y) as usize / 2); + *row.add((nx as i32 * src_addr.stride_x) as usize / 2) as i32 + }; + + let mut neighbor_mask: u8 = 0; + if has_mask { + let row = (mask_ptr as *mut u8).wrapping_add((ny as i32 * mask_addr.stride_y) as usize); + neighbor_mask = *row.add((nx as i32 * mask_addr.stride_x) as usize); + } + + if neighbor_mask == 0 { + let is_before = j < 0 || (j == 0 && i <= 0); + let is_after = j > 0 || (j == 0 && i > 0); + if (is_before && src_val < neighbor_val) || (is_after && src_val <= neighbor_val) { + flag = false; + break; + } + } + } + if !flag { + break; + } + } + + if flag { + *dst_pixel = src_val as i16; + } else { + *dst_pixel = i16::MIN; + } + } + } + } + } else { + vxUnmapImagePatch(output, dst_map_id); + vxUnmapImagePatch(input, src_map_id); + if has_mask { + vxUnmapImagePatch(mask, mask_map_id); + } + return VX_ERROR_INVALID_FORMAT; + } + + vxUnmapImagePatch(output, dst_map_id); + vxUnmapImagePatch(input, src_map_id); + if has_mask { + vxUnmapImagePatch(mask, mask_map_id); + } + VX_SUCCESS + } +} + +/// Enhanced Vision: HoughLinesP stub + +fn read_scalar_f32(scalar: vx_scalar) -> Option { + if scalar.is_null() { + return None; + } + unsafe { + let s = &*(scalar as *const crate::c_api_data::VxCScalarData); + if s.data.len() >= 4 { + Some(f32::from_le_bytes([s.data[0], s.data[1], s.data[2], s.data[3]])) + } else { + None + } + } +} + +fn read_scalar_u32(scalar: vx_scalar) -> Option { + if scalar.is_null() { + return None; + } + unsafe { + let s = &*(scalar as *const crate::c_api_data::VxCScalarData); + if s.data.len() >= 4 { + Some(u32::from_le_bytes([s.data[0], s.data[1], s.data[2], s.data[3]])) + } else { + None + } + } +} + +/// Enhanced Vision: MatchTemplate +/// Compares a template image against a source image and produces a comparison map. +/// Output dimensions: (src_w - tpl_w + 1) x (src_h - tpl_h + 1), S16 format. +pub fn vxu_match_template_impl(_ctx: vx_context, src: vx_image, templ: vx_image, matching_method_scalar: vx_scalar, output: vx_image) -> vx_status { + if src.is_null() || templ.is_null() || output.is_null() { + return VX_ERROR_INVALID_PARAMETERS; + } + + extern "C" { + fn vxQueryImage(image: vx_image, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxMapImagePatch(image: vx_image, rect: *const c_void, plane_index: u32, map_id: *mut vx_map_id, addr: *mut c_void, ptr: *mut *mut c_void, usage: vx_enum, mem_type: vx_enum, flags: u32) -> vx_status; + fn vxUnmapImagePatch(image: vx_image, map_id: vx_map_id) -> vx_status; + fn vxGetValidRegionImage(image: vx_image, rect: *mut c_void) -> vx_status; + } + + unsafe { + // Read matching method from scalar + let method = if !matching_method_scalar.is_null() { + let s = &*(matching_method_scalar as *const crate::c_api_data::VxCScalarData); + if s.data.len() >= 4 { + i32::from_le_bytes([s.data[0], s.data[1], s.data[2], s.data[3]]) + } else { + return VX_ERROR_INVALID_PARAMETERS; + } + } else { + return VX_ERROR_INVALID_PARAMETERS; + }; + + // Valid methods: VX_COMPARE_HAMMING=0x19000, VX_COMPARE_L1=0x19001, + // VX_COMPARE_L2=0x19002, VX_COMPARE_CCORR=0x19003, + // VX_COMPARE_L2_NORM=0x19004, VX_COMPARE_CCORR_NORM=0x19005 + let valid_methods = [0x19000i32, 0x19001, 0x19002, 0x19003, 0x19004, 0x19005]; + if !valid_methods.contains(&method) { + return VX_ERROR_INVALID_PARAMETERS; + } + + // Get source image info + let mut src_width: vx_uint32 = 0; + let mut src_height: vx_uint32 = 0; + let mut src_format: vx_df_image = 0; + let s1 = vxQueryImage(src, crate::c_api::VX_IMAGE_WIDTH, &mut src_width as *mut _ as *mut c_void, std::mem::size_of::()); + let s2 = vxQueryImage(src, crate::c_api::VX_IMAGE_HEIGHT, &mut src_height as *mut _ as *mut c_void, std::mem::size_of::()); + let s3 = vxQueryImage(src, crate::c_api::VX_IMAGE_FORMAT, &mut src_format as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS || s2 != VX_SUCCESS || s3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + // Get template image info + let mut tpl_width: vx_uint32 = 0; + let mut tpl_height: vx_uint32 = 0; + let mut tpl_format: vx_df_image = 0; + let t1 = vxQueryImage(templ, crate::c_api::VX_IMAGE_WIDTH, &mut tpl_width as *mut _ as *mut c_void, std::mem::size_of::()); + let t2 = vxQueryImage(templ, crate::c_api::VX_IMAGE_HEIGHT, &mut tpl_height as *mut _ as *mut c_void, std::mem::size_of::()); + let t3 = vxQueryImage(templ, crate::c_api::VX_IMAGE_FORMAT, &mut tpl_format as *mut _ as *mut c_void, std::mem::size_of::()); + if t1 != VX_SUCCESS || t2 != VX_SUCCESS || t3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + // Both must be U8 + if src_format != VX_DF_IMAGE_U8 || tpl_format != VX_DF_IMAGE_U8 { + return VX_ERROR_INVALID_FORMAT; + } + + // Get output image info + let mut out_width: vx_uint32 = 0; + let mut out_height: vx_uint32 = 0; + let mut out_format: vx_df_image = 0; + let o1 = vxQueryImage(output, crate::c_api::VX_IMAGE_WIDTH, &mut out_width as *mut _ as *mut c_void, std::mem::size_of::()); + let o2 = vxQueryImage(output, crate::c_api::VX_IMAGE_HEIGHT, &mut out_height as *mut _ as *mut c_void, std::mem::size_of::()); + let o3 = vxQueryImage(output, crate::c_api::VX_IMAGE_FORMAT, &mut out_format as *mut _ as *mut c_void, std::mem::size_of::()); + if o1 != VX_SUCCESS || o2 != VX_SUCCESS || o3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + // Output must be S16 + if out_format != VX_DF_IMAGE_S16 { + return VX_ERROR_INVALID_FORMAT; + } + + // Expected output dimensions + let expected_w = src_width.saturating_sub(tpl_width).saturating_add(1); + let expected_h = src_height.saturating_sub(tpl_height).saturating_add(1); + if out_width != expected_w || out_height != expected_h { + return VX_ERROR_INVALID_PARAMETERS; + } + + // Map source image + let src_rect = crate::c_api::vx_rectangle_t { + start_x: 0, + start_y: 0, + end_x: src_width, + end_y: src_height, + }; + let mut src_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut src_ptr: *mut c_void = std::ptr::null_mut(); + let mut src_map_id: vx_map_id = 0; + let s_map = vxMapImagePatch(src, &src_rect as *const _ as *const c_void, 0, &mut src_map_id, &mut src_addr as *mut _ as *mut c_void, &mut src_ptr as *mut *mut c_void, crate::c_api::VX_READ_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST, 0); + if s_map != VX_SUCCESS { + return s_map; + } + + // Map template image + let tpl_rect = crate::c_api::vx_rectangle_t { + start_x: 0, + start_y: 0, + end_x: tpl_width, + end_y: tpl_height, + }; + let mut tpl_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut tpl_ptr: *mut c_void = std::ptr::null_mut(); + let mut tpl_map_id: vx_map_id = 0; + let t_map = vxMapImagePatch(templ, &tpl_rect as *const _ as *const c_void, 0, &mut tpl_map_id, &mut tpl_addr as *mut _ as *mut c_void, &mut tpl_ptr as *mut *mut c_void, crate::c_api::VX_READ_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST, 0); + if t_map != VX_SUCCESS { + vxUnmapImagePatch(src, src_map_id); + return t_map; + } + + // Map output image + let out_rect = crate::c_api::vx_rectangle_t { + start_x: 0, + start_y: 0, + end_x: out_width, + end_y: out_height, + }; + let mut out_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut out_ptr: *mut c_void = std::ptr::null_mut(); + let mut out_map_id: vx_map_id = 0; + let o_map = vxMapImagePatch(output, &out_rect as *const _ as *const c_void, 0, &mut out_map_id, &mut out_addr as *mut _ as *mut c_void, &mut out_ptr as *mut *mut c_void, crate::c_api::VX_WRITE_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST, 0); + if o_map != VX_SUCCESS { + vxUnmapImagePatch(templ, tpl_map_id); + vxUnmapImagePatch(src, src_map_id); + return o_map; + } + + let sw = src_width as usize; + let sh = src_height as usize; + let tw = tpl_width as usize; + let th = tpl_height as usize; + let ow = out_width as usize; + let oh = out_height as usize; + let tpl_pixels = (tw * th) as f64; + + // Pre-compute template sum, sum of squares for normalized methods + let mut tpl_sum: f64 = 0.0; + let mut tpl_sum_sq: f64 = 0.0; + for ty in 0..th { + let tpl_row = (tpl_ptr as *mut u8).wrapping_add((ty as i32 * tpl_addr.stride_y) as usize); + for tx in 0..tw { + let tval = *tpl_row.add((tx as i32 * tpl_addr.stride_x) as usize) as f64; + tpl_sum += tval; + tpl_sum_sq += tval * tval; + } + } + + // Compute match template for each position + for oy in 0..oh { + let out_row = (out_ptr as *mut i16).wrapping_add((oy as i32 * out_addr.stride_y) as usize / std::mem::size_of::()); + for ox in 0..ow { + let sx = ox; + let sy = oy; + + let mut sum: f64 = 0.0; + let mut sum_sq: f64 = 0.0; + let mut sum_prod: f64 = 0.0; + + for ty in 0..th { + let src_row = (src_ptr as *mut u8).wrapping_add(((sy + ty) as i32 * src_addr.stride_y) as usize); + let tpl_row = (tpl_ptr as *mut u8).wrapping_add((ty as i32 * tpl_addr.stride_y) as usize); + for tx in 0..tw { + let sval = *src_row.add(((sx + tx) as i32 * src_addr.stride_x) as usize) as f64; + let tval = *tpl_row.add((tx as i32 * tpl_addr.stride_x) as usize) as f64; + + match method { + 0x19000 => { + // VX_COMPARE_HAMMING: XOR, average + sum += (sval as u8 ^ tval as u8) as f64; + } + 0x19001 => { + // VX_COMPARE_L1: average of absolute differences + sum += (sval - tval).abs(); + } + 0x19002 => { + // VX_COMPARE_L2: average of squared differences + sum += (sval - tval) * (sval - tval); + } + 0x19003 => { + // VX_COMPARE_CCORR: cross correlation + sum += sval * tval; + } + 0x19004 | 0x19005 => { + // VX_COMPARE_L2_NORM or VX_COMPARE_CCORR_NORM + sum_prod += sval * tval; + sum += sval; + sum_sq += sval * sval; + } + _ => {} + } + } + } + + let result = match method { + 0x19000 => { + // Hamming: average + let val = sum / tpl_pixels; + // Scale to S16 range, but keep reasonable values + (val * 256.0) as i16 + } + 0x19001 => { + // L1: average * 256 for S16 + let val = sum / tpl_pixels; + (val * 256.0) as i16 + } + 0x19002 => { + // L2: average * 256 for S16 + let val = sum / tpl_pixels; + (val * 256.0) as i16 + } + 0x19003 => { + // CCORR: sum * 2^15 / sqrt(tpl_sum_sq * img_sum_sq) + // For unnormalized, just use the raw sum, scaled + let val = sum / tpl_pixels; + // Scale so peak is around 32767 + let scale = 32767.0 / 255.0; + (val * scale) as i16 + } + 0x19004 => { + // VX_COMPARE_L2_NORM + // R = sum((T-I)^2) / sqrt(sum(T^2) * sum(I^2)) + let img_sum = sum; + let img_sum_sq = sum_sq; + let diff_sq = tpl_sum_sq + img_sum_sq - 2.0 * sum_prod; + let denom = (tpl_sum_sq * img_sum_sq).sqrt(); + let val = if denom > 0.0 { + diff_sq / denom + } else { + 0.0 + }; + (val * 32767.0) as i16 + } + 0x19005 => { + // VX_COMPARE_CCORR_NORM + // R = sum(T*I) * 2^15 / sqrt(sum(T^2) * sum(I^2)) + let denom = (tpl_sum_sq * sum_sq).sqrt(); + let val = if denom > 0.0 { + sum_prod * 32767.0 / denom + } else { + 0.0 + }; + val as i16 + } + _ => 0i16, + }; + + *out_row.add((ox as i32 * out_addr.stride_x) as usize / std::mem::size_of::()) = result; + } + } + + vxUnmapImagePatch(output, out_map_id); + vxUnmapImagePatch(templ, tpl_map_id); + vxUnmapImagePatch(src, src_map_id); + + VX_SUCCESS + } +} + +pub fn vxu_hough_lines_p_impl(_ctx: vx_context, input: vx_image, rho_scalar: vx_scalar, theta_scalar: vx_scalar, threshold_scalar: vx_scalar, line_length_scalar: vx_scalar, line_gap_scalar: vx_scalar, lines_array: vx_array) -> vx_status { + if input.is_null() || lines_array.is_null() { + return VX_ERROR_INVALID_PARAMETERS; + } + + unsafe { + // Read scalar params + let rho = read_scalar_f32(rho_scalar); + let theta = read_scalar_f32(theta_scalar); + let threshold = read_scalar_u32(threshold_scalar); + let min_line_length = read_scalar_u32(line_length_scalar); + let max_line_gap = read_scalar_u32(line_gap_scalar); + + if rho.is_none() || theta.is_none() || threshold.is_none() || min_line_length.is_none() || max_line_gap.is_none() { + return VX_ERROR_INVALID_PARAMETERS; + } + let rho = rho.unwrap(); + let theta = theta.unwrap(); + let threshold = threshold.unwrap() as i32; + let min_line_length = min_line_length.unwrap() as i32; + let max_line_gap = max_line_gap.unwrap() as i32; + + if rho <= 0.0 || theta <= 0.0 || threshold <= 0 { + return VX_ERROR_INVALID_PARAMETERS; + } + + extern "C" { + fn vxQueryImage(image: vx_image, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxMapImagePatch(image: vx_image, rect: *const c_void, plane_index: u32, map_id: *mut vx_map_id, addr: *mut c_void, ptr: *mut *mut c_void, usage: vx_enum, mem_type: vx_enum, flags: u32) -> vx_status; + fn vxUnmapImagePatch(image: vx_image, map_id: vx_map_id) -> vx_status; + fn vxGetValidRegionImage(image: vx_image, rect: *mut c_void) -> vx_status; + fn vxTruncateArray(arr: vx_array, new_num_items: vx_size) -> vx_status; + fn vxAddArrayItems(arr: vx_array, count: vx_size, ptr: *const c_void, stride: vx_size) -> vx_status; + } + + let mut src_width: vx_uint32 = 0; + let mut src_height: vx_uint32 = 0; + let mut src_format: vx_df_image = 0; + let s1 = vxQueryImage(input, crate::c_api::VX_IMAGE_WIDTH, &mut src_width as *mut _ as *mut c_void, std::mem::size_of::()); + let s2 = vxQueryImage(input, crate::c_api::VX_IMAGE_HEIGHT, &mut src_height as *mut _ as *mut c_void, std::mem::size_of::()); + let s3 = vxQueryImage(input, crate::c_api::VX_IMAGE_FORMAT, &mut src_format as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS || s2 != VX_SUCCESS || s3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut src_rect: crate::c_api::vx_rectangle_t = std::mem::zeroed(); + let vr = vxGetValidRegionImage(input, &mut src_rect as *mut _ as *mut c_void); + if vr != VX_SUCCESS { + return vr; + } + + let rect = crate::c_api::vx_rectangle_t { + start_x: 0, + start_y: 0, + end_x: src_width, + end_y: src_height, + }; + + let mut src_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut src_ptr: *mut c_void = std::ptr::null_mut(); + let mut src_map_id: vx_map_id = 0; + let s_map = vxMapImagePatch(input, &rect as *const _ as *const c_void, 0, &mut src_map_id, &mut src_addr as *mut _ as *mut c_void, &mut src_ptr as *mut *mut c_void, crate::c_api::VX_READ_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST, 0); + if s_map != VX_SUCCESS { + return s_map; + } + + let w = src_width as i32; + let h = src_height as i32; + + // Collect non-zero edge pixels + let mut points: Vec<(i32, i32)> = Vec::new(); + for y in src_rect.start_y as i32..src_rect.end_y as i32 { + for x in src_rect.start_x as i32..src_rect.end_x as i32 { + let val = if src_format == 0x31303055u32 as vx_df_image { + let row = (src_ptr as *mut u8).wrapping_add((y as i32 * src_addr.stride_y) as usize); + let byte = *row.add((x as i32 * src_addr.stride_x) as usize / 8); + (byte >> (x % 8)) & 1 + } else { + let row = (src_ptr as *mut u8).wrapping_add((y as i32 * src_addr.stride_y) as usize); + *row.add((x as i32 * src_addr.stride_x) as usize) + }; + if val != 0 { + points.push((x, y)); + } + } + } + vxUnmapImagePatch(input, src_map_id); + + if points.is_empty() { + let _ = vxTruncateArray(lines_array, 0); + return VX_SUCCESS; + } + + // HoughLinesP probabilistic algorithm + let max_rho = ((w * w + h * h) as f64).sqrt(); + let num_rho = (max_rho / rho as f64).ceil() as i32 * 2 + 1; + let num_theta = (std::f64::consts::PI / theta as f64).ceil() as i32 + 1; + + let mut accumulator: Vec> = vec![vec![0; num_theta as usize]; num_rho as usize]; + + // Build accumulator + for &(x, y) in &points { + for t in 0..num_theta { + let angle = t as f64 * theta as f64; + let r = (x as f64 * angle.cos() + y as f64 * angle.sin()) / rho as f64; + let r_idx = (r + num_rho as f64 / 2.0) as i32; + if r_idx >= 0 && r_idx < num_rho { + accumulator[r_idx as usize][t as usize] += 1; + } + } + } + + // Find lines using probabilistic approach + let mut lines: Vec<(f32, f32, f32, f32)> = Vec::new(); + let mut used: std::collections::HashSet<(i32, i32)> = std::collections::HashSet::new(); + + // Sort points randomly-ish for probabilistic behavior + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + let mut rng_state = 42u64; + let mut rng = || { + rng_state = rng_state.wrapping_mul(6364136223846793005).wrapping_add(1); + rng_state + }; + + // For each point not yet used, trace a line + for &(x, y) in &points { + if used.contains(&(x, y)) { + continue; + } + + // Find best angle for this point + let mut best_theta = 0; + let mut best_votes = 0; + for t in 0..num_theta { + let angle = t as f64 * theta as f64; + let r = (x as f64 * angle.cos() + y as f64 * angle.sin()) / rho as f64; + let r_idx = (r + num_rho as f64 / 2.0) as i32; + if r_idx >= 0 && r_idx < num_rho { + let votes = accumulator[r_idx as usize][t as usize]; + if votes > best_votes { + best_votes = votes; + best_theta = t; + } + } + } + + if best_votes < threshold { + continue; + } + + let angle = best_theta as f64 * theta as f64; + // Line direction is perpendicular to the normal (cos θ, sin θ) + let dx = -angle.sin(); + let dy = angle.cos(); + + // Trace forward and backward from (x, y) + let mut line_points_forward: Vec<(i32, i32)> = Vec::new(); + let mut line_points_backward: Vec<(i32, i32)> = Vec::new(); + + // Forward direction + let mut cx = x as f64; + let mut cy = y as f64; + for step in 0..(w.max(h) * 2) { + let nx = (cx + dx) as i32; + let ny = (cy + dy) as i32; + if nx < 0 || nx >= w || ny < 0 || ny >= h { + break; + } + // Check if pixel is edge + let is_edge = points.iter().any(|&(px, py)| px == nx && py == ny); + if is_edge { + line_points_forward.push((nx, ny)); + cx += dx; + cy += dy; + } else { + // Gap check + let gap_count = (1..=max_line_gap).find(|&g| { + let gx = (cx + dx * (g + 1) as f64) as i32; + let gy = (cy + dy * (g + 1) as f64) as i32; + gx >= 0 && gx < w && gy >= 0 && gy < h && points.iter().any(|&(px, py)| px == gx && py == gy) + }); + if let Some(gap) = gap_count { + cx += dx * (gap + 1) as f64; + cy += dy * (gap + 1) as f64; + let nx = cx as i32; + let ny = cy as i32; + if nx >= 0 && nx < w && ny >= 0 && ny < h { + line_points_forward.push((nx, ny)); + } + } else { + break; + } + } + } + + // Backward direction + let mut cx = x as f64; + let mut cy = y as f64; + for step in 0..(w.max(h) * 2) { + let nx = (cx - dx) as i32; + let ny = (cy - dy) as i32; + if nx < 0 || nx >= w || ny < 0 || ny >= h { + break; + } + let is_edge = points.iter().any(|&(px, py)| px == nx && py == ny); + if is_edge { + line_points_backward.push((nx, ny)); + cx -= dx; + cy -= dy; + } else { + let gap_count = (1..=max_line_gap).find(|&g| { + let gx = (cx - dx * (g + 1) as f64) as i32; + let gy = (cy - dy * (g + 1) as f64) as i32; + gx >= 0 && gx < w && gy >= 0 && gy < h && points.iter().any(|&(px, py)| px == gx && py == gy) + }); + if let Some(gap) = gap_count { + cx -= dx * (gap + 1) as f64; + cy -= dy * (gap + 1) as f64; + let nx = cx as i32; + let ny = cy as i32; + if nx >= 0 && nx < w && ny >= 0 && ny < h { + line_points_backward.push((nx, ny)); + } + } else { + break; + } + } + } + + // Combine points + let mut all_points = line_points_backward.clone(); + all_points.reverse(); + all_points.push((x, y)); + all_points.extend(line_points_forward); + + if all_points.len() < min_line_length as usize { + continue; + } + + // Mark points as used + for &(px, py) in &all_points { + used.insert((px, py)); + } + + // Create line segment from first to last point + if let (Some(&(sx, sy)), Some(&(ex, ey))) = (all_points.first(), all_points.last()) { + lines.push((sx as f32, sy as f32, ex as f32, ey as f32)); + } + } + + // Truncate array and add lines + let status = vxTruncateArray(lines_array, 0); + if status != VX_SUCCESS { + return status; + } + + #[repr(C)] + struct VxLine2d { + start_x: f32, + start_y: f32, + end_x: f32, + end_y: f32, + } + + let line_items: Vec = lines.iter().map(|(sx, sy, ex, ey)| VxLine2d { + start_x: *sx, + start_y: *sy, + end_x: *ex, + end_y: *ey, + }).collect(); + + if !line_items.is_empty() { + let add_status = vxAddArrayItems(lines_array, line_items.len(), line_items.as_ptr() as *const c_void, std::mem::size_of::()); + if add_status != VX_SUCCESS { + return add_status; + } + } + + VX_SUCCESS + } +} + + /// Pixel-wise multiplication with scale, overflow and rounding policies /// overflow_policy: 0 = WRAP, 1 = SATURATE /// rounding_policy: 1 = TO_ZERO, 2 = TO_NEAREST_EVEN @@ -9198,3 +10503,272 @@ pub fn vxu_half_scale_gaussian_impl( copy_rust_to_c_image(&dst, output) } } + +/// OpenVX LBP format enum constants +const VX_LBP: vx_enum = 0x18000; +const VX_MLBP: vx_enum = 0x18001; +const VX_ULBP: vx_enum = 0x18002; + +fn read_scalar_enum_from_scalar(scalar: vx_scalar) -> Option { + if scalar.is_null() { + return None; + } + unsafe { + let s = &*(scalar as *const crate::c_api_data::VxCScalarData); + if s.data.len() >= 4 { + Some(i32::from_le_bytes([s.data[0], s.data[1], s.data[2], s.data[3]])) + } else if s.data.len() >= 2 { + Some(i16::from_le_bytes([s.data[0], s.data[1]]) as i32) + } else if s.data.len() >= 1 { + Some(s.data[0] as i32) + } else { + None + } + } +} + +fn read_scalar_i8(scalar: vx_scalar) -> Option { + if scalar.is_null() { + return None; + } + unsafe { + let s = &*(scalar as *const crate::c_api_data::VxCScalarData); + if !s.data.is_empty() { + Some(s.data[0] as i8) + } else { + None + } + } +} + +/// Count bit transitions in an 8-bit circular pattern +fn lbp_uniform_value(pattern: u8) -> u8 { + let mut transitions = 0u8; + let mut prev_bit = (pattern >> 7) & 1; + for i in 0..8 { + let bit = (pattern >> i) & 1; + if bit != prev_bit { + transitions += 1; + } + prev_bit = bit; + } + if transitions <= 2 { + pattern + } else { + 9 // non-uniform pattern value + } +} + +/// Enhanced Vision: LBP (Local Binary Patterns) +pub fn vxu_lbp_impl(_ctx: vx_context, input: vx_image, format_scalar: vx_scalar, kernel_size_scalar: vx_scalar, output: vx_image) -> vx_status { + if input.is_null() || output.is_null() { + return VX_ERROR_INVALID_PARAMETERS; + } + + let format = read_scalar_enum_from_scalar(format_scalar).unwrap_or(VX_LBP); + let kernel_size = read_scalar_i8(kernel_size_scalar).unwrap_or(3); + + if kernel_size != 3 && kernel_size != 5 { + return VX_ERROR_INVALID_PARAMETERS; + } + + extern "C" { + fn vxQueryImage(image: vx_image, attribute: vx_enum, ptr: *mut c_void, size: vx_size) -> vx_status; + fn vxMapImagePatch(image: vx_image, rect: *const c_void, plane_index: u32, map_id: *mut vx_map_id, addr: *mut c_void, ptr: *mut *mut c_void, usage: vx_enum, mem_type: vx_enum, flags: u32) -> vx_status; + fn vxUnmapImagePatch(image: vx_image, map_id: vx_map_id) -> vx_status; + fn vxGetValidRegionImage(image: vx_image, rect: *mut c_void) -> vx_status; + } + + unsafe { + let mut src_width: vx_uint32 = 0; + let mut src_height: vx_uint32 = 0; + let mut src_format: vx_df_image = 0; + let s1 = vxQueryImage(input, crate::c_api::VX_IMAGE_WIDTH, &mut src_width as *mut _ as *mut c_void, std::mem::size_of::()); + let s2 = vxQueryImage(input, crate::c_api::VX_IMAGE_HEIGHT, &mut src_height as *mut _ as *mut c_void, std::mem::size_of::()); + let s3 = vxQueryImage(input, crate::c_api::VX_IMAGE_FORMAT, &mut src_format as *mut _ as *mut c_void, std::mem::size_of::()); + if s1 != VX_SUCCESS || s2 != VX_SUCCESS || s3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut dst_width: vx_uint32 = 0; + let mut dst_height: vx_uint32 = 0; + let mut dst_format: vx_df_image = 0; + let d1 = vxQueryImage(output, crate::c_api::VX_IMAGE_WIDTH, &mut dst_width as *mut _ as *mut c_void, std::mem::size_of::()); + let d2 = vxQueryImage(output, crate::c_api::VX_IMAGE_HEIGHT, &mut dst_height as *mut _ as *mut c_void, std::mem::size_of::()); + let d3 = vxQueryImage(output, crate::c_api::VX_IMAGE_FORMAT, &mut dst_format as *mut _ as *mut c_void, std::mem::size_of::()); + if d1 != VX_SUCCESS || d2 != VX_SUCCESS || d3 != VX_SUCCESS { + return VX_ERROR_INVALID_PARAMETERS; + } + + if src_format != VX_DF_IMAGE_U8 as vx_df_image || dst_format != VX_DF_IMAGE_U8 as vx_df_image { + return VX_ERROR_INVALID_FORMAT; + } + if src_width != dst_width || src_height != dst_height { + return VX_ERROR_INVALID_PARAMETERS; + } + + let mut src_rect: crate::c_api::vx_rectangle_t = std::mem::zeroed(); + let vr = vxGetValidRegionImage(input, &mut src_rect as *mut _ as *mut c_void); + if vr != VX_SUCCESS { + return vr; + } + + let rect = crate::c_api::vx_rectangle_t { + start_x: 0, + start_y: 0, + end_x: src_width, + end_y: src_height, + }; + + let mut src_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut dst_addr: crate::c_api::vx_imagepatch_addressing_t = std::mem::zeroed(); + let mut src_ptr: *mut c_void = std::ptr::null_mut(); + let mut dst_ptr: *mut c_void = std::ptr::null_mut(); + let mut src_map_id: vx_map_id = 0; + let mut dst_map_id: vx_map_id = 0; + + let s_map = vxMapImagePatch(input, &rect as *const _ as *const c_void, 0, &mut src_map_id, &mut src_addr as *mut _ as *mut c_void, &mut src_ptr as *mut *mut c_void, crate::c_api::VX_READ_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST, 0); + if s_map != VX_SUCCESS { + return s_map; + } + + let d_map = vxMapImagePatch(output, &rect as *const _ as *const c_void, 0, &mut dst_map_id, &mut dst_addr as *mut _ as *mut c_void, &mut dst_ptr as *mut *mut c_void, crate::c_api::VX_WRITE_ONLY, crate::c_api::VX_MEMORY_TYPE_HOST, 0); + if d_map != VX_SUCCESS { + vxUnmapImagePatch(input, src_map_id); + return d_map; + } + + let width = src_width as i32; + let height = src_height as i32; + let border = (kernel_size / 2) as i32; + + // Sampling offsets matching the OpenVX reference implementation (c_model/c_lbp.c) + // In the reference, (x, y) are in C-order where y increases downward. + // g[0..7] are the 8 neighbor values in counter-clockwise order from top-left. + // + // For 3x3 (ksize=3): + // g[0]=src[x-1,y-1], g[1]=src[x,y-1], g[2]=src[x+1,y-1], + // g[3]=src[x+1,y], g[4]=src[x+1,y+1], g[5]=src[x,y+1], + // g[6]=src[x-1,y+1], g[7]=src[x-1,y] + // + // For 5x5 Standard/Uniform (ksize=5): + // g[0]=src[x-1,y-1], g[1]=src[x,y-2], g[2]=src[x+1,y-1], + // g[3]=src[x+2,y], g[4]=src[x+1,y+1], g[5]=src[x,y+2], + // g[6]=src[x-1,y+1], g[7]=src[x-2,y] + // + // For 5x5 Modified (ksize=5) - MLBP only supports ksize=5: + // g[0]=src[x-2,y-2], g[1]=src[x,y-2], g[2]=src[x+2,y-2], + // g[3]=src[x+2,y], g[4]=src[x+2,y+2], g[5]=src[x,y+2], + // g[6]=src[x-2,y+2], g[7]=src[x-2,y] + // + // We store offsets as (dy, dx) pairs. + let (offsets_std, offsets_mlbp) = if kernel_size == 3 { + ( + [ + (-1, -1), (-1, 0), (-1, 1), + (0, 1), (1, 1), (1, 0), + (1, -1), (0, -1), + ], + None, // MLBP not supported for 3x3 + ) + } else { + ( + [ + (-1, -1), (-2, 0), (-1, 1), + (0, 2), (1, 1), (2, 0), + (1, -1), (0, -2), + ], + Some([ + (-2, -2), (-2, 0), (-2, 2), + (0, 2), (2, 2), (2, 0), + (2, -2), (0, -2), + ]), + ) + }; + + if format == VX_MLBP && offsets_mlbp.is_none() { + return VX_ERROR_INVALID_PARAMETERS; + } + + // Only process interior pixels (same as reference) + let y_start = border; + let y_end = height - border; + let x_start = border; + let x_end = width - border; + + for y in 0..height { + for x in 0..width { + let mut output_val: u8 = 0; + + if y >= y_start && y < y_end && x >= x_start && x < x_end { + let src_row = (src_ptr as *mut u8).wrapping_add((y * src_addr.stride_y) as usize); + let center_val = *src_row.add((x * src_addr.stride_x) as usize); + + let mut pattern: u8 = 0; + + if format == VX_MLBP { + // Modified LBP: compare each neighbor against the average of all 8 neighbors + // Reference: avg = (g[0]+g[1]+g[2]+g[3]+g[4]+g[5]+g[6]+g[7]+1)/8 + // sum += ((g[p] > avg) * (1 << p)); + let mlbp_offsets = offsets_mlbp.unwrap(); + let mut g: [u8; 8] = [0; 8]; + for (i, (dy, dx)) in mlbp_offsets.iter().enumerate() { + let ny = y + dy; + let nx = x + dx; + let row = (src_ptr as *mut u8).wrapping_add((ny * src_addr.stride_y) as usize); + g[i] = *row.add((nx * src_addr.stride_x) as usize); + } + let avg = ((g[0] as i32 + g[1] as i32 + g[2] as i32 + g[3] as i32 + g[4] as i32 + g[5] as i32 + g[6] as i32 + g[7] as i32 + 1) / 8) as u8; + for p in 0..8 { + if g[p] > avg { + pattern |= 1 << p; + } + } + output_val = pattern; + } else { + // Standard LBP or Uniform LBP + let offsets = offsets_std; + let mut g: [u8; 8] = [0; 8]; + for (i, (dy, dx)) in offsets.iter().enumerate() { + let ny = y + dy; + let nx = x + dx; + let row = (src_ptr as *mut u8).wrapping_add((ny * src_addr.stride_y) as usize); + g[i] = *row.add((nx * src_addr.stride_x) as usize); + } + + for p in 0..8 { + if g[p] >= center_val { + pattern |= 1 << p; + } + } + + if format == VX_ULBP { + // Count transitions (same as reference c_lbp.c) + let mut transitions = 0u8; + let mut prev_bit = if g[7] >= center_val { 1 } else { 0 }; + for p in 0..8 { + let bit = if g[p] >= center_val { 1 } else { 0 }; + transitions += (bit != prev_bit) as u8; + prev_bit = bit; + } + if transitions <= 2 { + output_val = pattern; + } else { + output_val = 9; + } + } else { + output_val = pattern; + } + } + } + + let dst_row = (dst_ptr as *mut u8).wrapping_add((y * dst_addr.stride_y) as usize); + *dst_row.add((x * dst_addr.stride_x) as usize) = output_val; + } + } + + vxUnmapImagePatch(output, dst_map_id); + vxUnmapImagePatch(input, src_map_id); + VX_SUCCESS + } +}