Skip to content

Commit 2ca16ac

Browse files
committed
fix issue #24: realistic Remap patterns and timing memory fences
1 parent ef776ff commit 2ca16ac

14 files changed

Lines changed: 359 additions & 69 deletions

CHANGELOG.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,33 @@ regressions are caught before the slower Phase 2 comparison job runs.
9999
Use `--warn-only` for a non-failing audit or `--allow-feature-set` to
100100
limit the checked scope.
101101

102+
### Changed — Remap benchmark now uses a realistic lens-distortion pattern
103+
104+
Issue #24 identified that the Remap benchmark used an identity remap
105+
(`dst(x,y) -> src(x,y)`) which yields perfectly sequential, cache-friendly
106+
memory access. Real-world remaps (lens distortion, fisheye correction,
107+
undistort) have scattered access patterns, and some implementations may
108+
detect and short-circuit identity remaps, inflating reported throughput.
109+
110+
`TestDataGenerator::createRemap()` and `OpenCVTestData::makeRemap()` now
111+
default to a deterministic radial lens-distortion model. Two additional
112+
patterns are selectable via `--remap-pattern`:
113+
114+
- `identity` — restores the old sequential/identity behaviour
115+
- `lens_distortion` — default; radial barrel distortion
116+
- `random_offsets` — small per-pixel jitter that breaks any identity fast path
117+
118+
The Remap and Remap_Nearest benchmarks use the configured pattern, while
119+
their correctness `verify_fn`s continue to use a stable identity remap so
120+
reference checks remain deterministic. JSON/Markdown reports now record
121+
`remap_pattern` in the config section for reproducibility.
122+
123+
### Fixed — timing calls now include full memory fences
124+
125+
To reduce compiler/CPU instruction-reordering effects around the measured
126+
kernel call, `BenchmarkTimer::start()` and `stop()` now issue
127+
`std::atomic_thread_fence(std::memory_order_seq_cst)` before reading and
128+
after writing the high-resolution clock sample.
102129

103130
### Fixed — opencv-mark duplicate `OpticalFlowPyrLK` benchmark name
104131

include/benchmark_config.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,13 @@ struct BenchmarkConfig {
9696
// perf" numbers but no longer a fair per-kernel cross-impl compare).
9797
int threads = 1;
9898

99+
// Remap coordinate generation pattern. The default is a radial
100+
// lens-distortion model because an identity remap is unrepresentative
101+
// of real-world fisheye/undistort workloads and may trigger impl-specific
102+
// fast paths. Use "identity" only when you explicitly want the old
103+
// cache-friendly behaviour.
104+
std::string remap_pattern = "lens_distortion"; // identity | lens_distortion | random_offsets
105+
99106
// Timer self-test — runs the validation harness instead of any
100107
// benchmark. Exit code reflects PASS/FAIL of the timer audit.
101108
bool validate_timing = false;

include/benchmark_runner.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ class BenchmarkRunner {
8080
std::vector<BenchmarkCase> registerPixelwiseBenchmarks();
8181
std::vector<BenchmarkCase> registerFilterBenchmarks();
8282
std::vector<BenchmarkCase> registerColorBenchmarks();
83-
std::vector<BenchmarkCase> registerGeometricBenchmarks();
83+
std::vector<BenchmarkCase> registerGeometricBenchmarks(const BenchmarkConfig& config);
8484
std::vector<BenchmarkCase> registerStatisticalBenchmarks();
8585
std::vector<BenchmarkCase> registerMultiscaleBenchmarks();
8686
std::vector<BenchmarkCase> registerFeatureBenchmarks();

include/test_data_generator.h

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,28 @@
55
#include <cstdint>
66
#include <random>
77

8+
namespace benchmark {
9+
10+
// Remap coordinate generation patterns. The default benchmark pattern is
11+
// LENS_DISTORTION because an identity remap produces perfectly sequential
12+
// memory access that is unrepresentative of real-world lens correction,
13+
// fisheye undistortion, or general warping workloads.
14+
enum class RemapPattern {
15+
IDENTITY, // (x,y) -> (x,y): sequential, cache-friendly, not realistic
16+
LENS_DISTORTION, // radial distortion model: realistic scattered access
17+
RANDOM_OFFSETS // small random jitter around each pixel
18+
};
19+
20+
// Parse a user-supplied remap pattern string. Returns LENS_DISTORTION for
21+
// unknown values so benchmarks always run with a realistic default.
22+
inline RemapPattern remapPatternFromString(const std::string& s) {
23+
if (s == "identity") return RemapPattern::IDENTITY;
24+
if (s == "random_offsets") return RemapPattern::RANDOM_OFFSETS;
25+
return RemapPattern::LENS_DISTORTION;
26+
}
27+
28+
} // namespace benchmark
29+
830
class TestDataGenerator {
931
public:
1032
explicit TestDataGenerator(uint64_t seed = 42);
@@ -25,7 +47,10 @@ class TestDataGenerator {
2547
vx_matrix createAffineMatrix(vx_context ctx);
2648
vx_matrix createPerspectiveMatrix(vx_context ctx);
2749
vx_remap createRemap(vx_context ctx, uint32_t src_w, uint32_t src_h,
28-
uint32_t dst_w, uint32_t dst_h);
50+
uint32_t dst_w, uint32_t dst_h,
51+
benchmark::RemapPattern pattern = benchmark::RemapPattern::LENS_DISTORTION);
52+
vx_remap createRemapIdentity(vx_context ctx, uint32_t src_w, uint32_t src_h,
53+
uint32_t dst_w, uint32_t dst_h);
2954
vx_convolution createConvolution3x3(vx_context ctx);
3055
vx_lut createLUT(vx_context ctx);
3156
vx_distribution createDistribution(vx_context ctx, vx_size num_bins,
@@ -40,6 +65,7 @@ class TestDataGenerator {
4065

4166
private:
4267
std::mt19937_64 rng_;
68+
uint64_t seed_;
4369
};
4470

4571
#endif // TEST_DATA_GENERATOR_H

opencv-mark/include/opencv_runner.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class OpenCVRunner {
101101
// `registerXxxBenchmarks()` factories.
102102
std::vector<OpenCVBenchmarkCase> registerCvFilterBenchmarks();
103103
std::vector<OpenCVBenchmarkCase> registerCvColorBenchmarks();
104-
std::vector<OpenCVBenchmarkCase> registerCvGeometricBenchmarks();
104+
std::vector<OpenCVBenchmarkCase> registerCvGeometricBenchmarks(const BenchmarkConfig& config);
105105
std::vector<OpenCVBenchmarkCase> registerCvPixelwiseBenchmarks();
106106
std::vector<OpenCVBenchmarkCase> registerCvStatisticalBenchmarks();
107107
std::vector<OpenCVBenchmarkCase> registerCvMiscBenchmarks();

opencv-mark/include/opencv_test_data.h

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,26 @@
1111
#include <cstdint>
1212
#include <opencv2/core.hpp>
1313
#include <random>
14+
#include <string>
1415

1516
namespace opencv_mark {
1617

18+
// Coordinate generation patterns for cv::remap. Keep in sync with
19+
// openvx-mark's benchmark::RemapPattern.
20+
enum class RemapPattern {
21+
IDENTITY, // (x,y) -> (x,y): sequential, not representative
22+
LENS_DISTORTION, // radial distortion model (default for benchmarks)
23+
RANDOM_OFFSETS // small random jitter around each pixel
24+
};
25+
26+
// Parse a user-supplied remap pattern string. Returns LENS_DISTORTION for
27+
// unknown values so benchmarks always run with a realistic default.
28+
inline RemapPattern remapPatternFromString(const std::string& s) {
29+
if (s == "identity") return RemapPattern::IDENTITY;
30+
if (s == "random_offsets") return RemapPattern::RANDOM_OFFSETS;
31+
return RemapPattern::LENS_DISTORTION;
32+
}
33+
1734
class OpenCVTestData {
1835
public:
1936
explicit OpenCVTestData(uint64_t seed = 42);
@@ -42,11 +59,18 @@ class OpenCVTestData {
4259
cv::Mat makePerspectiveMatrix();
4360

4461
// Two CV_32FC1 maps (mapX, mapY) for cv::remap, sized
45-
// (dst_w, dst_h), each pixel mapped to a slightly displaced source
46-
// location so the kernel does real bilinear sampling work.
62+
// (dst_w, dst_h). Default pattern is LENS_DISTORTION so the
63+
// benchmark exercises scattered, realistic memory access; pass
64+
// IDENTITY to reproduce the old cache-friendly behaviour.
4765
void makeRemap(uint32_t src_w, uint32_t src_h,
4866
uint32_t dst_w, uint32_t dst_h,
49-
cv::Mat& mapX, cv::Mat& mapY);
67+
cv::Mat& mapX, cv::Mat& mapY,
68+
RemapPattern pattern = RemapPattern::LENS_DISTORTION);
69+
70+
// Identity remap maps for deterministic correctness checks.
71+
static void makeRemapIdentity(uint32_t src_w, uint32_t src_h,
72+
uint32_t dst_w, uint32_t dst_h,
73+
cv::Mat& mapX, cv::Mat& mapY);
5074

5175
// 3x3 CV_16SC1 convolution kernel for cv::filter2D — same
5276
// signed-int weights openvx-mark uses for its CustomConvolution
@@ -60,6 +84,7 @@ class OpenCVTestData {
6084

6185
private:
6286
std::mt19937_64 rng_;
87+
uint64_t seed_;
6388
};
6489

6590
} // namespace opencv_mark

opencv-mark/src/benchmarks/cv_geometric.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@
3232

3333
namespace opencv_mark {
3434

35-
std::vector<OpenCVBenchmarkCase> registerCvGeometricBenchmarks() {
35+
std::vector<OpenCVBenchmarkCase> registerCvGeometricBenchmarks(const BenchmarkConfig& config) {
3636
std::vector<OpenCVBenchmarkCase> cases;
37+
const RemapPattern remap_pattern = remapPatternFromString(config.remap_pattern);
3738

3839
{
3940
OpenCVBenchmarkCase bc;
@@ -151,21 +152,26 @@ std::vector<OpenCVBenchmarkCase> registerCvGeometricBenchmarks() {
151152
cases.push_back(bc);
152153
}
153154

154-
// Remap — U8 in, U8 out, 32FC1 mapX + 32FC1 mapY
155+
// Remap — U8 in, U8 out, 32FC1 mapX + CV_32FC1 mapY
155156
//
156157
// Note: cv::remap uses a separate output for the mapY (we reuse
157158
// CaseBuffers.output_extra to hold mapY since input_extra is
158159
// already taken by mapX). The actual output image lives in
159160
// CaseBuffers.output.
161+
//
162+
// The map coordinates default to a radial lens-distortion model
163+
// (LENS_DISTORTION) so the benchmark exercises scattered, realistic
164+
// memory access. Use --remap-pattern identity to restore the old
165+
// cache-friendly behaviour.
160166
{
161167
OpenCVBenchmarkCase bc;
162168
bc.name = "Remap";
163169
bc.category = "geometric";
164170
bc.feature_set = "vision";
165-
bc.setup_fn = [](uint32_t w, uint32_t h, OpenCVTestData& gen, CaseBuffers& bufs) -> bool {
171+
bc.setup_fn = [remap_pattern](uint32_t w, uint32_t h, OpenCVTestData& gen, CaseBuffers& bufs) -> bool {
166172
bufs.input = gen.makeU8(w, h);
167173
bufs.output.create(static_cast<int>(h), static_cast<int>(w), CV_8UC1);
168-
gen.makeRemap(w, h, w, h, bufs.input_extra, bufs.output_extra);
174+
gen.makeRemap(w, h, w, h, bufs.input_extra, bufs.output_extra, remap_pattern);
169175
return true;
170176
};
171177
bc.run_fn = [](CaseBuffers& bufs) {
@@ -304,16 +310,17 @@ std::vector<OpenCVBenchmarkCase> registerCvGeometricBenchmarks() {
304310
cases.push_back(bc);
305311
}
306312

307-
// Remap_Nearest — INTER_NEAREST
313+
// Remap_Nearest — INTER_NEAREST. Uses the same coordinate pattern
314+
// as Remap so both variants exercise realistic memory access.
308315
{
309316
OpenCVBenchmarkCase bc;
310317
bc.name = "Remap_Nearest";
311318
bc.category = "geometric";
312319
bc.feature_set = "vision";
313-
bc.setup_fn = [](uint32_t w, uint32_t h, OpenCVTestData& gen, CaseBuffers& bufs) -> bool {
320+
bc.setup_fn = [remap_pattern](uint32_t w, uint32_t h, OpenCVTestData& gen, CaseBuffers& bufs) -> bool {
314321
bufs.input = gen.makeU8(w, h);
315322
bufs.output.create(static_cast<int>(h), static_cast<int>(w), CV_8UC1);
316-
gen.makeRemap(w, h, w, h, bufs.input_extra, bufs.output_extra);
323+
gen.makeRemap(w, h, w, h, bufs.input_extra, bufs.output_extra, remap_pattern);
317324
return true;
318325
};
319326
bc.run_fn = [](CaseBuffers& bufs) {

opencv-mark/src/main.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,9 @@ void printUsage(const char* prog) {
6060
printf(" --stability-threshold N CV%% threshold (default: 15)\n");
6161
printf(" --max-retries N Max retries for unstable benchmarks (default: 1)\n");
6262
printf(" --no-outlier-removal Use raw samples for headline mean/median/stddev/CV\n");
63-
printf(" --include-unstable-in-scores Include high-CV results in composite scores\n\n");
63+
printf(" --include-unstable-in-scores Include high-CV results in composite scores\n");
64+
printf(" --remap-pattern PATTERN Remap coordinate pattern: identity,\n");
65+
printf(" lens_distortion (default), random_offsets\n\n");
6466

6567
printf("Output:\n");
6668
printf(" --output-dir DIR Output directory (default: ./benchmark_results)\n");
@@ -190,6 +192,15 @@ bool parseArgs(int argc, char* argv[], BenchmarkConfig& config) {
190192
config.remove_outliers = false;
191193
} else if (arg == "--include-unstable-in-scores") {
192194
config.exclude_unstable_from_scores = false;
195+
} else if (arg == "--remap-pattern" && i + 1 < argc) {
196+
config.remap_pattern = argv[++i];
197+
if (config.remap_pattern != "identity" &&
198+
config.remap_pattern != "lens_distortion" &&
199+
config.remap_pattern != "random_offsets") {
200+
printf("WARNING: unknown remap pattern '%s', using lens_distortion\n",
201+
config.remap_pattern.c_str());
202+
config.remap_pattern = "lens_distortion";
203+
}
193204
} else if (arg == "--compare" && i + 1 < argc) {
194205
config.compare_files = splitComma(argv[++i]);
195206
} else if (arg == "--threads" && i + 1 < argc) {
@@ -332,7 +343,7 @@ int main(int argc, char* argv[]) {
332343
opencv_mark::OpenCVRunner runner(config);
333344
runner.addCases(opencv_mark::registerCvFilterBenchmarks());
334345
runner.addCases(opencv_mark::registerCvColorBenchmarks());
335-
runner.addCases(opencv_mark::registerCvGeometricBenchmarks());
346+
runner.addCases(opencv_mark::registerCvGeometricBenchmarks(config));
336347
runner.addCases(opencv_mark::registerCvPixelwiseBenchmarks());
337348
runner.addCases(opencv_mark::registerCvStatisticalBenchmarks());
338349
runner.addCases(opencv_mark::registerCvMiscBenchmarks());

opencv-mark/src/opencv_test_data.cpp

Lines changed: 83 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
namespace opencv_mark {
66

7-
OpenCVTestData::OpenCVTestData(uint64_t seed) : rng_(seed) {}
7+
OpenCVTestData::OpenCVTestData(uint64_t seed) : rng_(seed), seed_(seed) {}
88

9-
void OpenCVTestData::reseed(uint64_t seed) { rng_.seed(seed); }
9+
void OpenCVTestData::reseed(uint64_t seed) { rng_.seed(seed); seed_ = seed; }
1010

1111
cv::Mat OpenCVTestData::makeU8(uint32_t width, uint32_t height) {
1212
cv::Mat m(static_cast<int>(height), static_cast<int>(width), CV_8UC1);
@@ -64,12 +64,9 @@ cv::Mat OpenCVTestData::makePerspectiveMatrix() {
6464
return m;
6565
}
6666

67-
void OpenCVTestData::makeRemap(uint32_t src_w, uint32_t src_h,
68-
uint32_t dst_w, uint32_t dst_h,
69-
cv::Mat& mapX, cv::Mat& mapY) {
70-
// Identity map with a tiny per-pixel offset so the kernel does
71-
// real bilinear sampling instead of degenerate copy. Mirrors
72-
// openvx-mark's TestDataGenerator::createRemap behaviour.
67+
void OpenCVTestData::makeRemapIdentity(uint32_t src_w, uint32_t src_h,
68+
uint32_t dst_w, uint32_t dst_h,
69+
cv::Mat& mapX, cv::Mat& mapY) {
7370
mapX.create(static_cast<int>(dst_h), static_cast<int>(dst_w), CV_32FC1);
7471
mapY.create(static_cast<int>(dst_h), static_cast<int>(dst_w), CV_32FC1);
7572
const float sx = static_cast<float>(src_w) / static_cast<float>(dst_w);
@@ -78,8 +75,84 @@ void OpenCVTestData::makeRemap(uint32_t src_w, uint32_t src_h,
7875
auto* mx = mapX.ptr<float>(y);
7976
auto* my = mapY.ptr<float>(y);
8077
for (int x = 0; x < mapX.cols; ++x) {
81-
mx[x] = (x + 0.5f) * sx + 0.25f;
82-
my[x] = (y + 0.5f) * sy + 0.25f;
78+
mx[x] = (x + 0.5f) * sx - 0.5f;
79+
my[x] = (y + 0.5f) * sy - 0.5f;
80+
}
81+
}
82+
}
83+
84+
void OpenCVTestData::makeRemap(uint32_t src_w, uint32_t src_h,
85+
uint32_t dst_w, uint32_t dst_h,
86+
cv::Mat& mapX, cv::Mat& mapY,
87+
RemapPattern pattern) {
88+
// Build the requested pattern. By default use a radial lens-distortion
89+
// model so the benchmark exercises scattered, realistic memory access
90+
// rather than the cache-friendly identity path.
91+
if (pattern == RemapPattern::IDENTITY) {
92+
makeRemapIdentity(src_w, src_h, dst_w, dst_h, mapX, mapY);
93+
return;
94+
}
95+
96+
mapX.create(static_cast<int>(dst_h), static_cast<int>(dst_w), CV_32FC1);
97+
mapY.create(static_cast<int>(dst_h), static_cast<int>(dst_w), CV_32FC1);
98+
const float sx = static_cast<float>(src_w) / static_cast<float>(dst_w);
99+
const float sy = static_cast<float>(src_h) / static_cast<float>(dst_h);
100+
const float dst_wf = static_cast<float>(dst_w);
101+
const float dst_hf = static_cast<float>(dst_h);
102+
103+
if (pattern == RemapPattern::LENS_DISTORTION) {
104+
const float cx = dst_wf * 0.5f;
105+
const float cy = dst_hf * 0.5f;
106+
const float max_radius = 0.5f * std::sqrt(dst_wf * dst_wf + dst_hf * dst_hf);
107+
const float inv_max_r2 = 1.0f / (max_radius * max_radius + 1e-6f);
108+
const float k1 = 0.08f;
109+
const float k2 = 0.01f;
110+
for (int y = 0; y < mapX.rows; ++y) {
111+
auto* mx = mapX.ptr<float>(y);
112+
auto* my = mapY.ptr<float>(y);
113+
for (int x = 0; x < mapX.cols; ++x) {
114+
const float xf = static_cast<float>(x);
115+
const float yf = static_cast<float>(y);
116+
const float dx = xf - cx;
117+
const float dy = yf - cy;
118+
const float r2 = (dx * dx + dy * dy) * inv_max_r2;
119+
const float r4 = r2 * r2;
120+
const float scale = 1.0f + k1 * r2 + k2 * r4;
121+
const float src_x = ((xf * sx) - cx) * scale + cx;
122+
const float src_y = ((yf * sy) - cy) * scale + cy;
123+
// cv::remap expects subpixel coordinates; (x+0.5)*scale-0.5
124+
// is the standard convention, but here src_x/src_y already
125+
// represent destination-to-source mapping coordinates and
126+
// are used directly to match openvx-mark's convention.
127+
mx[x] = src_x;
128+
my[x] = src_y;
129+
}
130+
}
131+
} else { // RANDOM_OFFSETS
132+
// Seed a dedicated, deterministic RNG for this pattern so offsets are
133+
// reproducible regardless of how many other benchmarks ran before
134+
// this one. Mix the original global seed with dimensions and a
135+
// pattern tag so the map is stable across run order.
136+
const uint64_t seed = seed_ + static_cast<uint64_t>(src_w) * 73856093u +
137+
static_cast<uint64_t>(src_h) * 19349663u +
138+
static_cast<uint64_t>(dst_w) * 83492791u +
139+
static_cast<uint64_t>(dst_h) * 4256233u +
140+
0x9e3779b97f4a7c15ULL;
141+
std::mt19937_64 pattern_rng(seed);
142+
std::uniform_real_distribution<float> dist(-2.0f, 2.0f);
143+
const float src_wf = static_cast<float>(src_w);
144+
const float src_hf = static_cast<float>(src_h);
145+
for (int y = 0; y < mapX.rows; ++y) {
146+
auto* mx = mapX.ptr<float>(y);
147+
auto* my = mapY.ptr<float>(y);
148+
for (int x = 0; x < mapX.cols; ++x) {
149+
float rx = (x + 0.5f) * sx - 0.5f + dist(pattern_rng);
150+
float ry = (y + 0.5f) * sy - 0.5f + dist(pattern_rng);
151+
// Clamp to valid source bounds so border handling stays
152+
// consistent across implementations and runs.
153+
mx[x] = std::max(-0.5f, std::min(rx, src_wf - 0.5f));
154+
my[x] = std::max(-0.5f, std::min(ry, src_hf - 0.5f));
155+
}
83156
}
84157
}
85158
}

0 commit comments

Comments
 (0)