@@ -299,6 +299,11 @@ void GDALRasterCompareAlgorithm::GeoTransformComparison(
299299 }
300300}
301301
302+ #if defined(__GNUC__) && !defined(__clang__)
303+ #pragma GCC push_options
304+ #pragma GCC optimize("O3")
305+ #endif
306+
302307/* ***********************************************************************/
303308/* Diff() */
304309/* ***********************************************************************/
@@ -317,8 +322,7 @@ static void CompareVectors(size_t nValCount, const T *refValues,
317322 const T *inputValues, uint64_t &countDiffPixels,
318323 Tdiff &maxDiffValue)
319324{
320- constexpr bool bIsFloatingPoint =
321- std::is_same_v<T, float > || std::is_same_v<T, double >;
325+ constexpr bool bIsFloatingPoint = std::is_floating_point_v<T>;
322326 if constexpr (bIsComplex)
323327 {
324328 for (size_t i = 0 ; i < nValCount; ++i)
@@ -431,11 +435,11 @@ static void CompareVectors(size_t nValCount, const T *refValues,
431435 }
432436 }
433437#endif
434- for (; i < nValCount; ++i )
438+ if constexpr (bIsFloatingPoint )
435439 {
436- if constexpr (bIsFloatingPoint)
440+ static_assert (std::is_same_v<T, Tdiff>);
441+ for (; i < nValCount; ++i)
437442 {
438- static_assert (std::is_same_v<T, Tdiff>);
439443 if (std::isnan (refValues[i]))
440444 {
441445 if (!std::isnan (inputValues[i]))
@@ -453,18 +457,56 @@ static void CompareVectors(size_t nValCount, const T *refValues,
453457 {
454458 continue ;
455459 }
456- }
457460
458- const Tdiff diff = refValues[i] >= inputValues[i]
459- ? Diff (static_cast <Tdiff>(refValues[i]),
460- static_cast <Tdiff>(inputValues[i]))
461- : Diff (static_cast <Tdiff>(inputValues[i]),
462- static_cast <Tdiff>(refValues[i]));
463- if (diff > 0 )
461+ const Tdiff diff =
462+ refValues[i] >= inputValues[i]
463+ ? Diff (static_cast <Tdiff>(refValues[i]),
464+ static_cast <Tdiff>(inputValues[i]))
465+ : Diff (static_cast <Tdiff>(inputValues[i]),
466+ static_cast <Tdiff>(refValues[i]));
467+ if (diff > 0 )
468+ {
469+ ++countDiffPixels;
470+ if (diff > maxDiffValue)
471+ maxDiffValue = diff;
472+ }
473+ }
474+ }
475+ else
476+ {
477+ static_assert (std::is_unsigned_v<Tdiff>);
478+ while (i < nValCount)
464479 {
465- ++countDiffPixels;
466- if (diff > maxDiffValue)
467- maxDiffValue = diff;
480+ // Autovectorizer friendly inner loop (GCC, clang, ICX),
481+ // by making sure it increases countDiffLocal on the same size
482+ // as Tdiff.
483+
484+ Tdiff countDiffLocal = 0 ;
485+ const size_t innerLimit = [i, nValCount]()
486+ {
487+ if constexpr (sizeof (Tdiff) < sizeof (size_t ))
488+ {
489+ return std::min (nValCount - 1 ,
490+ i + std::numeric_limits<Tdiff>::max ());
491+ }
492+ else
493+ {
494+ (void )i;
495+ return nValCount - 1 ;
496+ }
497+ }();
498+ for (; i <= innerLimit; ++i)
499+ {
500+ const Tdiff diff =
501+ refValues[i] >= inputValues[i]
502+ ? Diff (static_cast <Tdiff>(refValues[i]),
503+ static_cast <Tdiff>(inputValues[i]))
504+ : Diff (static_cast <Tdiff>(inputValues[i]),
505+ static_cast <Tdiff>(refValues[i]));
506+ countDiffLocal += (diff > 0 );
507+ maxDiffValue = std::max (maxDiffValue, diff);
508+ }
509+ countDiffPixels += countDiffLocal;
468510 }
469511 }
470512 }
@@ -951,6 +993,10 @@ static void ComparePixels(std::vector<std::string> &aosReport,
951993 }
952994}
953995
996+ #if defined(__GNUC__) && !defined(__clang__)
997+ #pragma GCC pop_options
998+ #endif
999+
9541000/* ***********************************************************************/
9551001/* GDALRasterCompareAlgorithm::BandComparison() */
9561002/* ***********************************************************************/
0 commit comments