Skip to content

Commit 75c3b52

Browse files
committed
gdal raster compare: modify CompareVectors() to be autovectorizer friendly with ICX, GCC and clang
1 parent cadb5ee commit 75c3b52

File tree

1 file changed

+61
-15
lines changed

1 file changed

+61
-15
lines changed

apps/gdalalg_raster_compare.cpp

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,11 @@ void GDALRasterCompareAlgorithm::GeoTransformComparison(
299299
}
300300
}
301301

302+
#if defined(__GNUC__) && !defined(__clang__)
303+
#pragma GCC push_options
304+
#pragma GCC optimize("O3")
305+
#endif
306+
302307
/************************************************************************/
303308
/* Diff() */
304309
/************************************************************************/
@@ -317,8 +322,7 @@ static void CompareVectors(size_t nValCount, const T *refValues,
317322
const T *inputValues, uint64_t &countDiffPixels,
318323
Tdiff &maxDiffValue)
319324
{
320-
constexpr bool bIsFloatingPoint =
321-
std::is_same_v<T, float> || std::is_same_v<T, double>;
325+
constexpr bool bIsFloatingPoint = std::is_floating_point_v<T>;
322326
if constexpr (bIsComplex)
323327
{
324328
for (size_t i = 0; i < nValCount; ++i)
@@ -431,11 +435,11 @@ static void CompareVectors(size_t nValCount, const T *refValues,
431435
}
432436
}
433437
#endif
434-
for (; i < nValCount; ++i)
438+
if constexpr (bIsFloatingPoint)
435439
{
436-
if constexpr (bIsFloatingPoint)
440+
static_assert(std::is_same_v<T, Tdiff>);
441+
for (; i < nValCount; ++i)
437442
{
438-
static_assert(std::is_same_v<T, Tdiff>);
439443
if (std::isnan(refValues[i]))
440444
{
441445
if (!std::isnan(inputValues[i]))
@@ -453,18 +457,56 @@ static void CompareVectors(size_t nValCount, const T *refValues,
453457
{
454458
continue;
455459
}
456-
}
457460

458-
const Tdiff diff = refValues[i] >= inputValues[i]
459-
? Diff(static_cast<Tdiff>(refValues[i]),
460-
static_cast<Tdiff>(inputValues[i]))
461-
: Diff(static_cast<Tdiff>(inputValues[i]),
462-
static_cast<Tdiff>(refValues[i]));
463-
if (diff > 0)
461+
const Tdiff diff =
462+
refValues[i] >= inputValues[i]
463+
? Diff(static_cast<Tdiff>(refValues[i]),
464+
static_cast<Tdiff>(inputValues[i]))
465+
: Diff(static_cast<Tdiff>(inputValues[i]),
466+
static_cast<Tdiff>(refValues[i]));
467+
if (diff > 0)
468+
{
469+
++countDiffPixels;
470+
if (diff > maxDiffValue)
471+
maxDiffValue = diff;
472+
}
473+
}
474+
}
475+
else
476+
{
477+
static_assert(std::is_unsigned_v<Tdiff>);
478+
while (i < nValCount)
464479
{
465-
++countDiffPixels;
466-
if (diff > maxDiffValue)
467-
maxDiffValue = diff;
480+
// Autovectorizer friendly inner loop (GCC, clang, ICX),
481+
// by making sure it increases countDiffLocal on the same size
482+
// as Tdiff.
483+
484+
Tdiff countDiffLocal = 0;
485+
const size_t innerLimit = [i, nValCount]()
486+
{
487+
if constexpr (sizeof(Tdiff) < sizeof(size_t))
488+
{
489+
return std::min(nValCount - 1,
490+
i + std::numeric_limits<Tdiff>::max());
491+
}
492+
else
493+
{
494+
(void)i;
495+
return nValCount - 1;
496+
}
497+
}();
498+
for (; i <= innerLimit; ++i)
499+
{
500+
const Tdiff diff =
501+
refValues[i] >= inputValues[i]
502+
? Diff(static_cast<Tdiff>(refValues[i]),
503+
static_cast<Tdiff>(inputValues[i]))
504+
: Diff(static_cast<Tdiff>(inputValues[i]),
505+
static_cast<Tdiff>(refValues[i]));
506+
countDiffLocal += (diff > 0);
507+
maxDiffValue = std::max(maxDiffValue, diff);
508+
}
509+
countDiffPixels += countDiffLocal;
468510
}
469511
}
470512
}
@@ -951,6 +993,10 @@ static void ComparePixels(std::vector<std::string> &aosReport,
951993
}
952994
}
953995

996+
#if defined(__GNUC__) && !defined(__clang__)
997+
#pragma GCC pop_options
998+
#endif
999+
9541000
/************************************************************************/
9551001
/* GDALRasterCompareAlgorithm::BandComparison() */
9561002
/************************************************************************/

0 commit comments

Comments
 (0)