Skip to content

Commit b4ef97f

Browse files
committed
PERF: OOC optimization for AlignSections family (Group E)
Adds bulk I/O OOC algorithm paths for the 4 AlignSections filters using copyIntoBuffer/copyFromBuffer, cached ensemble arrays, and bulk mask reads. Tests exercise both in-core and OOC paths via GENERATE + ForceOocAlgorithmGuard. Algorithm changes: - AlignSections base: AlignSectionsTransferDataOocImpl with bulk slice read/write - AlignSectionsMisorientation: findShiftsOoc with 2-slice buffer + std::swap - AlignSectionsMutualInformation: formFeaturesSectionsOoc with 1-slice CCL buffer - AlignSectionsFeatureCentroid: findShiftsOoc with bulk mask reads per Z-slice Infrastructure changes (SimplnxOoc plugin): - Z-slice-aligned default chunk shape {1,Y,X} for 3D data - copyFromBuffer fast path: skip read-modify-write for tuple-aligned writes - copyIntoBuffer fast path: direct span-based readTuples for tuple-aligned reads
1 parent 91efc23 commit b4ef97f

12 files changed

Lines changed: 1336 additions & 227 deletions

File tree

src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/AlignSectionsMisorientation.cpp

Lines changed: 261 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@
33
#include "simplnx/Common/Numbers.hpp"
44
#include "simplnx/DataStructure/DataGroup.hpp"
55
#include "simplnx/DataStructure/Geometry/IGridGeometry.hpp"
6+
#include "simplnx/Utilities/AlgorithmDispatch.hpp"
67
#include "simplnx/Utilities/FilterUtilities.hpp"
78
#include "simplnx/Utilities/MaskCompareUtilities.hpp"
89

910
#include <EbsdLib/LaueOps/LaueOps.h>
1011

11-
#include <iostream>
12-
1312
using namespace nx::core;
1413

1514
// -----------------------------------------------------------------------------
@@ -41,6 +40,15 @@ Result<> AlignSectionsMisorientation::operator()()
4140
// -----------------------------------------------------------------------------
4241
Result<> AlignSectionsMisorientation::findShifts(std::vector<int64_t>& xShifts, std::vector<int64_t>& yShifts)
4342
{
43+
{
44+
const auto& quatsCheck = m_DataStructure.getDataRefAs<Float32Array>(m_InputValues->quatsArrayPath);
45+
const auto& cellPhasesCheck = m_DataStructure.getDataRefAs<Int32Array>(m_InputValues->cellPhasesArrayPath);
46+
if(ForceOocAlgorithm() || IsOutOfCore(quatsCheck) || IsOutOfCore(cellPhasesCheck))
47+
{
48+
return findShiftsOoc(xShifts, yShifts);
49+
}
50+
}
51+
4452
std::unique_ptr<MaskCompareUtilities::MaskCompare> maskCompare = nullptr;
4553
if(m_InputValues->UseMask)
4654
{
@@ -88,10 +96,6 @@ Result<> AlignSectionsMisorientation::findShifts(std::vector<int64_t>& xShifts,
8896
// Loop over the Z Direction
8997
for(int64_t iter = 1; iter < dims[2]; iter++)
9098
{
91-
if(m_ShouldCancel)
92-
{
93-
return {};
94-
}
9599
throttledMessenger.sendThrottledMessage([&]() { return fmt::format("Determining Shifts || {:.2f}% Complete", CalculatePercentComplete(iter, dims[2])); });
96100
if(getCancel())
97101
{
@@ -298,3 +302,254 @@ Result<> AlignSectionsMisorientation::findShifts(std::vector<int64_t>& xShifts,
298302

299303
return {};
300304
}
305+
306+
// -----------------------------------------------------------------------------
307+
// OOC-optimized findShifts: buffers 2 adjacent Z-slices of quats, cellPhases,
308+
// and mask into local vectors before the convergence loop, eliminating random
309+
// chunk-based DataStore access.
310+
// -----------------------------------------------------------------------------
311+
Result<> AlignSectionsMisorientation::findShiftsOoc(std::vector<int64_t>& xShifts, std::vector<int64_t>& yShifts)
312+
{
313+
// For OOC mask buffering, get the raw mask store for bulk reads instead of per-element isTrue()
314+
const AbstractDataStore<uint8>* maskUInt8StorePtr = nullptr;
315+
const AbstractDataStore<bool>* maskBoolStorePtr = nullptr;
316+
if(m_InputValues->UseMask)
317+
{
318+
const auto& maskArray = m_DataStructure.getDataRefAs<IDataArray>(m_InputValues->MaskArrayPath);
319+
if(maskArray.getDataType() == DataType::uint8)
320+
{
321+
maskUInt8StorePtr = &dynamic_cast<const DataArray<uint8>&>(maskArray).getDataStoreRef();
322+
}
323+
else if(maskArray.getDataType() == DataType::boolean)
324+
{
325+
maskBoolStorePtr = &dynamic_cast<const DataArray<bool>&>(maskArray).getDataStoreRef();
326+
}
327+
else
328+
{
329+
return MakeErrorResult(-53900, fmt::format("Mask Array is not Bool or UInt8: {}", m_InputValues->MaskArrayPath.toString()));
330+
}
331+
}
332+
333+
auto* gridGeom = m_DataStructure.getDataAs<IGridGeometry>(m_InputValues->ImageGeometryPath);
334+
335+
const auto& cellPhases = m_DataStructure.getDataRefAs<Int32Array>(m_InputValues->cellPhasesArrayPath);
336+
const auto& quats = m_DataStructure.getDataRefAs<Float32Array>(m_InputValues->quatsArrayPath);
337+
const auto& crystalStructuresArray = m_DataStructure.getDataRefAs<UInt32Array>(m_InputValues->crystalStructuresArrayPath);
338+
auto& cellPhasesStore = cellPhases.getDataStoreRef();
339+
auto& quatsStore = quats.getDataStoreRef();
340+
341+
// Cache ensemble-level array locally to avoid per-element virtual dispatch in hot loop
342+
const auto& crystalStructuresStore = crystalStructuresArray.getDataStoreRef();
343+
std::vector<uint32> crystalStructures(crystalStructuresStore.getSize());
344+
crystalStructuresStore.copyIntoBuffer(0, nonstd::span<uint32>(crystalStructures.data(), crystalStructures.size()));
345+
346+
SizeVec3 udims = gridGeom->getDimensions();
347+
348+
std::array<int64_t, 3> dims = {
349+
static_cast<int64_t>(udims[0]),
350+
static_cast<int64_t>(udims[1]),
351+
static_cast<int64_t>(udims[2]),
352+
};
353+
354+
std::vector<ebsdlib::LaueOps::Pointer> orientationOps = ebsdlib::LaueOps::GetAllOrientationOps();
355+
356+
std::vector<bool> misorients(dims[0] * dims[1], false);
357+
358+
const auto halfDim0 = static_cast<int64_t>(dims[0] * 0.5f);
359+
const auto halfDim1 = static_cast<int64_t>(dims[1] * 0.5f);
360+
361+
double deg2Rad = (nx::core::numbers::pi / 180.0);
362+
ThrottledMessenger throttledMessenger = getMessageHelper().createThrottledMessenger();
363+
364+
const int64_t sliceVoxels = dims[0] * dims[1];
365+
366+
// Buffers for 2 Z-slices: reference (slice+1) and current (slice)
367+
std::vector<float32> refQuatsBuf(sliceVoxels * 4);
368+
std::vector<float32> curQuatsBuf(sliceVoxels * 4);
369+
std::vector<int32_t> refPhasesBuf(sliceVoxels);
370+
std::vector<int32_t> curPhasesBuf(sliceVoxels);
371+
std::vector<uint8_t> refMaskBuf;
372+
std::vector<uint8_t> curMaskBuf;
373+
if(m_InputValues->UseMask)
374+
{
375+
refMaskBuf.resize(sliceVoxels, 1);
376+
curMaskBuf.resize(sliceVoxels, 1);
377+
}
378+
379+
// Optional output stores
380+
AbstractDataStore<uint32>* slicesStorePtr = nullptr;
381+
AbstractDataStore<int64>* relativeShiftsStorePtr = nullptr;
382+
AbstractDataStore<int64>* cumulativeShiftsStorePtr = nullptr;
383+
if(m_InputValues->StoreAlignmentShifts)
384+
{
385+
slicesStorePtr = &m_DataStructure.getDataAs<UInt32Array>(m_InputValues->SlicesArrayPath)->getDataStoreRef();
386+
relativeShiftsStorePtr = &m_DataStructure.getDataAs<Int64Array>(m_InputValues->RelativeShiftsArrayPath)->getDataStoreRef();
387+
cumulativeShiftsStorePtr = &m_DataStructure.getDataAs<Int64Array>(m_InputValues->CumulativeShiftsArrayPath)->getDataStoreRef();
388+
}
389+
390+
// Pre-load the first reference slice (the top-most Z-slice) via bulk read
391+
{
392+
int64_t firstRefOffset = (dims[2] - 1) * sliceVoxels;
393+
cellPhasesStore.copyIntoBuffer(firstRefOffset, nonstd::span<int32_t>(refPhasesBuf.data(), sliceVoxels));
394+
quatsStore.copyIntoBuffer(firstRefOffset * 4, nonstd::span<float32>(refQuatsBuf.data(), sliceVoxels * 4));
395+
if(m_InputValues->UseMask)
396+
{
397+
if(maskUInt8StorePtr != nullptr)
398+
{
399+
maskUInt8StorePtr->copyIntoBuffer(firstRefOffset, nonstd::span<uint8>(refMaskBuf.data(), sliceVoxels));
400+
}
401+
else if(maskBoolStorePtr != nullptr)
402+
{
403+
auto boolBuf = std::make_unique<bool[]>(sliceVoxels);
404+
maskBoolStorePtr->copyIntoBuffer(firstRefOffset, nonstd::span<bool>(boolBuf.get(), sliceVoxels));
405+
for(int64_t idx = 0; idx < sliceVoxels; idx++)
406+
{
407+
refMaskBuf[idx] = boolBuf[idx] ? 1 : 0;
408+
}
409+
}
410+
}
411+
}
412+
413+
for(int64_t iter = 1; iter < dims[2]; iter++)
414+
{
415+
throttledMessenger.sendThrottledMessage([&]() { return fmt::format("Determining Shifts || {:.2f}% Complete", CalculatePercentComplete(iter, dims[2])); });
416+
if(getCancel())
417+
{
418+
return {};
419+
}
420+
421+
int64_t slice = (dims[2] - 1) - iter;
422+
423+
// Bulk-read current slice (reference available from pre-load or previous iteration swap)
424+
int64_t curOffset = slice * sliceVoxels;
425+
cellPhasesStore.copyIntoBuffer(curOffset, nonstd::span<int32_t>(curPhasesBuf.data(), sliceVoxels));
426+
quatsStore.copyIntoBuffer(curOffset * 4, nonstd::span<float32>(curQuatsBuf.data(), sliceVoxels * 4));
427+
if(m_InputValues->UseMask)
428+
{
429+
if(maskUInt8StorePtr != nullptr)
430+
{
431+
maskUInt8StorePtr->copyIntoBuffer(curOffset, nonstd::span<uint8>(curMaskBuf.data(), sliceVoxels));
432+
}
433+
else if(maskBoolStorePtr != nullptr)
434+
{
435+
auto boolBuf = std::make_unique<bool[]>(sliceVoxels);
436+
maskBoolStorePtr->copyIntoBuffer(curOffset, nonstd::span<bool>(boolBuf.get(), sliceVoxels));
437+
for(int64_t idx = 0; idx < sliceVoxels; idx++)
438+
{
439+
curMaskBuf[idx] = boolBuf[idx] ? 1 : 0;
440+
}
441+
}
442+
}
443+
444+
float minDisorientation = std::numeric_limits<float>::max();
445+
int64_t oldxshift = -1;
446+
int64_t oldyshift = -1;
447+
int64_t newxshift = 0;
448+
int64_t newyshift = 0;
449+
450+
std::fill(misorients.begin(), misorients.end(), false);
451+
452+
float misorientationTolerance = static_cast<float>(m_InputValues->misorientationTolerance * deg2Rad);
453+
454+
while(newxshift != oldxshift || newyshift != oldyshift)
455+
{
456+
oldxshift = newxshift;
457+
oldyshift = newyshift;
458+
for(int32_t j = -3; j < 4; j++)
459+
{
460+
for(int32_t k = -3; k < 4; k++)
461+
{
462+
float disorientation = 0.0f;
463+
float count = 0.0f;
464+
int64_t xIdx = k + oldxshift + halfDim0;
465+
int64_t yIdx = j + oldyshift + halfDim1;
466+
int64_t idx = (dims[0] * yIdx) + xIdx;
467+
if(!misorients[idx] && llabs(k + oldxshift) < halfDim0 && llabs(j + oldyshift) < halfDim1)
468+
{
469+
for(int64_t l = 0; l < dims[1]; l = l + 4)
470+
{
471+
for(int64_t n = 0; n < dims[0]; n = n + 4)
472+
{
473+
if((l + j + oldyshift) >= 0 && (l + j + oldyshift) < dims[1] && (n + k + oldxshift) >= 0 && (n + k + oldxshift) < dims[0])
474+
{
475+
count++;
476+
// Local buffer indices (within-slice)
477+
int64_t refLocalIdx = l * dims[0] + n;
478+
int64_t curLocalIdx = (l + j + oldyshift) * dims[0] + (n + k + oldxshift);
479+
480+
bool maskOk = !m_InputValues->UseMask || (refMaskBuf[refLocalIdx] != 0 && curMaskBuf[curLocalIdx] != 0);
481+
if(maskOk)
482+
{
483+
float angle = std::numeric_limits<float>::max();
484+
if(refPhasesBuf[refLocalIdx] > 0 && curPhasesBuf[curLocalIdx] > 0)
485+
{
486+
ebsdlib::QuatD quat1(refQuatsBuf[refLocalIdx * 4], refQuatsBuf[refLocalIdx * 4 + 1], refQuatsBuf[refLocalIdx * 4 + 2], refQuatsBuf[refLocalIdx * 4 + 3]);
487+
auto laueClass1 = static_cast<int32_t>(crystalStructures[refPhasesBuf[refLocalIdx]]);
488+
ebsdlib::QuatD quat2(curQuatsBuf[curLocalIdx * 4], curQuatsBuf[curLocalIdx * 4 + 1], curQuatsBuf[curLocalIdx * 4 + 2], curQuatsBuf[curLocalIdx * 4 + 3]);
489+
auto laueClass2 = static_cast<int32_t>(crystalStructures[curPhasesBuf[curLocalIdx]]);
490+
if(laueClass1 == laueClass2 && laueClass1 < static_cast<uint32_t>(orientationOps.size()))
491+
{
492+
ebsdlib::AxisAngleDType axisAngle = orientationOps[laueClass1]->calculateMisorientation(quat1, quat2);
493+
angle = axisAngle[3];
494+
}
495+
}
496+
if(angle > misorientationTolerance)
497+
{
498+
disorientation++;
499+
}
500+
}
501+
if(m_InputValues->UseMask)
502+
{
503+
if(refMaskBuf[refLocalIdx] != 0 && curMaskBuf[curLocalIdx] == 0)
504+
{
505+
disorientation++;
506+
}
507+
if(refMaskBuf[refLocalIdx] == 0 && curMaskBuf[curLocalIdx] != 0)
508+
{
509+
disorientation++;
510+
}
511+
}
512+
}
513+
}
514+
}
515+
disorientation = disorientation / count;
516+
xIdx = k + oldxshift + halfDim0;
517+
yIdx = j + oldyshift + halfDim1;
518+
idx = (dims[0] * yIdx) + xIdx;
519+
misorients[idx] = true;
520+
if(disorientation < minDisorientation || (disorientation == minDisorientation && ((llabs(k + oldxshift) < llabs(newxshift)) || (llabs(j + oldyshift) < llabs(newyshift)))))
521+
{
522+
newxshift = k + oldxshift;
523+
newyshift = j + oldyshift;
524+
minDisorientation = disorientation;
525+
}
526+
}
527+
}
528+
}
529+
}
530+
xShifts[iter] = xShifts[iter - 1] + newxshift;
531+
yShifts[iter] = yShifts[iter - 1] + newyshift;
532+
533+
if(m_InputValues->StoreAlignmentShifts)
534+
{
535+
usize xIndex = iter * 2;
536+
usize yIndex = (iter * 2) + 1;
537+
(*slicesStorePtr)[xIndex] = slice;
538+
(*slicesStorePtr)[yIndex] = slice + 1;
539+
(*relativeShiftsStorePtr)[xIndex] = newxshift;
540+
(*relativeShiftsStorePtr)[yIndex] = newyshift;
541+
(*cumulativeShiftsStorePtr)[xIndex] = xShifts[iter];
542+
(*cumulativeShiftsStorePtr)[yIndex] = yShifts[iter];
543+
}
544+
545+
// Current slice becomes the reference for the next iteration (O(1) pointer swap)
546+
std::swap(refQuatsBuf, curQuatsBuf);
547+
std::swap(refPhasesBuf, curPhasesBuf);
548+
if(m_InputValues->UseMask)
549+
{
550+
std::swap(refMaskBuf, curMaskBuf);
551+
}
552+
}
553+
554+
return {};
555+
}

src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/AlignSectionsMisorientation.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@ class ORIENTATIONANALYSIS_EXPORT AlignSectionsMisorientation : public AlignSecti
6060
Result<> findShifts(std::vector<int64_t>& xShifts, std::vector<int64_t>& yShifts) override;
6161

6262
private:
63+
/**
64+
* @brief OOC-optimized variant of findShifts that buffers two adjacent Z-slices
65+
* into local vectors before the convergence loop, eliminating per-tuple chunk thrashing.
66+
* @param xShifts Output vector of cumulative X shifts per slice.
67+
* @param yShifts Output vector of cumulative Y shifts per slice.
68+
* @return Success or error result.
69+
*/
70+
Result<> findShiftsOoc(std::vector<int64_t>& xShifts, std::vector<int64_t>& yShifts);
71+
6372
DataStructure& m_DataStructure;
6473
const AlignSectionsMisorientationInputValues* m_InputValues = nullptr;
6574
const std::atomic_bool& m_ShouldCancel;

0 commit comments

Comments
 (0)