Skip to content

Commit fb3f5bc

Browse files
committed
[hdSt, hgi*] UMA and ReBAR support
1 parent a2a731c commit fb3f5bc

File tree

11 files changed

+333
-168
lines changed

11 files changed

+333
-168
lines changed

pxr/imaging/hdSt/resourceRegistry.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -843,15 +843,18 @@ HdStResourceRegistry::_Commit()
843843
if (req.range && req.range->RequiresStaging()) {
844844
const size_t numElements =
845845
source->GetNumElements();
846-
// Avoid calling functions on
846+
// Avoid calling functions on
847847
// HdNullBufferSources
848848
if (numElements > 0) {
849-
stagingBufferSize += numElements *
849+
stagingBufferSize.fetch_add(
850+
numElements *
850851
HdDataSizeOfTupleType(
851-
source->GetTupleType());
852+
source->GetTupleType()),
853+
std::memory_order_relaxed);
852854
}
853-
stagingBufferSize +=
854-
_GetChainedStagingSize(source);
855+
stagingBufferSize.fetch_add(
856+
_GetChainedStagingSize(source),
857+
std::memory_order_relaxed);
855858
}
856859
}
857860
}
@@ -934,7 +937,8 @@ HdStResourceRegistry::_Commit()
934937
HD_TRACE_SCOPE("Copy");
935938
// 4. copy phase:
936939
//
937-
_stagingBuffer->Resize(stagingBufferSize);
940+
_stagingBuffer->Resize(
941+
stagingBufferSize.load(std::memory_order_relaxed));
938942

939943
for (_PendingSource &pendingSource : _pendingSources) {
940944
HdBufferArrayRangeSharedPtr &dstRange = pendingSource.range;
@@ -974,10 +978,8 @@ HdStResourceRegistry::_Commit()
974978
_uniformSsboAggregationStrategy->Flush();
975979
_singleAggregationStrategy->Flush();
976980

977-
_stagingBuffer->Flush();
978-
979981
// Make sure the writes are visible to computations that follow
980-
if (_blitCmds) {
982+
if (_stagingBuffer->Flush() && _blitCmds) {
981983
_blitCmds->InsertMemoryBarrier(HgiMemoryBarrierAll);
982984
}
983985
SubmitBlitWork();

pxr/imaging/hdSt/stagingBuffer.cpp

Lines changed: 34 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@ HdStStagingBuffer::HdStStagingBuffer(HdStResourceRegistry *resourceRegistry)
1717
: _resourceRegistry(resourceRegistry)
1818
, _head(0)
1919
, _capacity(0)
20-
, _activeSlot(0)
2120
{
22-
_tripleBuffered = resourceRegistry->GetHgi()->GetCapabilities()->
21+
_uniformMemoryAccess = resourceRegistry->GetHgi()->GetCapabilities()->
2322
IsSet(HgiDeviceCapabilitiesBitsUnifiedMemory);
2423
}
2524

@@ -31,16 +30,12 @@ HdStStagingBuffer::~HdStStagingBuffer()
3130
void
3231
HdStStagingBuffer::Deallocate()
3332
{
34-
Hgi* hgi = _resourceRegistry->GetHgi();
35-
36-
for (size_t i = 0; i < MULTIBUFFERING; ++i) {
37-
if (_handles[i]) {
38-
hgi->DestroyBuffer(&_handles[i]);
39-
}
33+
if (_buffer) {
34+
_resourceRegistry->GetHgi()->DestroyBuffer(&_buffer);
35+
_buffer = {};
4036
}
4137

4238
_capacity = 0;
43-
_activeSlot = 0;
4439
}
4540

4641
void
@@ -65,41 +60,40 @@ HdStStagingBuffer::StageCopy(HgiBufferCpuToGpuOp const &copyOp)
6560
return;
6661
}
6762

68-
// When the to-be-copied data is 'large' doing the extra memcpy into the
63+
// Skip staging buffer if device supports unified memory or when
64+
// the to-be-copied data is 'large'. Doing the extra memcpy into the
6965
// stating buffer to avoid many small GPU buffer upload can be more
7066
// expensive than just submitting the CPU to GPU copy operation directly.
7167
// The value of 'queueThreshold' is estimated (when is the extra memcpy
7268
// into the staging buffer slower than immediately issuing a gpu upload)
73-
static const int queueThreshold = 512*1024;
74-
if (!_tripleBuffered && copyOp.byteSize > queueThreshold) {
69+
static constexpr size_t queueThreshold = 512 * 1024;
70+
if (_uniformMemoryAccess || copyOp.byteSize > queueThreshold) {
7571
HgiBlitCmds* blitCmds = _resourceRegistry->GetGlobalBlitCmds();
7672
blitCmds->CopyBufferCpuToGpu(copyOp);
7773
return;
7874
}
7975

80-
HgiBufferHandle buffer = _handles[_activeSlot];
81-
constexpr size_t recoveryRatio = 4;
76+
static constexpr size_t recoveryRatio = 4;
8277

83-
// If there is no buffer in the active slot or it is either too small or
78+
// If there is no buffer or it is either too small or
8479
// substantially larger than the required size, recreate it.
85-
if (!buffer ||
86-
buffer->GetDescriptor().byteSize < _capacity ||
87-
buffer->GetDescriptor().byteSize > _capacity * recoveryRatio) {
80+
if (!_buffer ||
81+
_buffer->GetDescriptor().byteSize < _capacity ||
82+
_buffer->GetDescriptor().byteSize > _capacity * recoveryRatio) {
8883
HgiBufferDesc bufferDesc;
8984
bufferDesc.byteSize = _capacity;
9085

9186
Hgi* hgi = _resourceRegistry->GetHgi();
9287

93-
if (buffer) {
94-
hgi->DestroyBuffer(&buffer);
88+
if (_buffer) {
89+
hgi->DestroyBuffer(&_buffer);
9590
}
9691

97-
_handles[_activeSlot] = hgi->CreateBuffer(bufferDesc);
98-
buffer = _handles[_activeSlot];
92+
_buffer = hgi->CreateBuffer(bufferDesc);
9993
}
10094

101-
size_t capacity = buffer->GetDescriptor().byteSize;
102-
uint8_t *cpuStaging = static_cast<uint8_t*>(buffer->GetCPUStagingAddress());
95+
size_t capacity = _buffer->GetDescriptor().byteSize;
96+
uint8_t *cpuStaging = static_cast<uint8_t*>(_buffer->GetCPUStagingAddress());
10397

10498
if (TF_VERIFY(_head + copyOp.byteSize <= capacity)) {
10599
// Copy source into the staging buffer.
@@ -128,7 +122,7 @@ HdStStagingBuffer::StageCopy(HgiBufferCpuToGpuOp const &copyOp)
128122
// Create a GPU to GPU blit operation to do the final copy.
129123
HgiBufferGpuToGpuOp gpuCopy;
130124

131-
gpuCopy.gpuSourceBuffer = buffer;
125+
gpuCopy.gpuSourceBuffer = _buffer;
132126
gpuCopy.sourceByteOffset = _head;
133127
gpuCopy.byteSize = copyOp.byteSize;
134128
gpuCopy.gpuDestinationBuffer = copyOp.gpuDestinationBuffer;
@@ -141,33 +135,29 @@ HdStStagingBuffer::StageCopy(HgiBufferCpuToGpuOp const &copyOp)
141135
}
142136
}
143137

144-
void
138+
bool
145139
HdStStagingBuffer::Flush()
146140
{
147141
if (_head == 0) {
148-
_gpuCopyOps.clear();
149-
return;
142+
// UMA case
143+
return false;
150144
}
151145

152146
HgiBlitCmds* blitCmds = _resourceRegistry->GetGlobalBlitCmds();
153147

154148
blitCmds->PushDebugGroup(__ARCH_PRETTY_FUNCTION__);
155149

156-
if (!_tripleBuffered) {
157-
// If this isn't UMA then blit the staging buffer to GPU.
158-
HgiBufferCpuToGpuOp op;
159-
HgiBufferHandle buffer = _handles[_activeSlot];
160-
uint8_t* const cpuStaging = static_cast<uint8_t* const>(
161-
buffer->GetCPUStagingAddress());
162-
163-
op.cpuSourceBuffer = cpuStaging;
164-
op.sourceByteOffset = 0;
165-
op.gpuDestinationBuffer = buffer;
166-
op.destinationByteOffset = 0;
167-
op.byteSize = _head;
168-
blitCmds->CopyBufferCpuToGpu(op);
169-
blitCmds->InsertMemoryBarrier(HgiMemoryBarrierAll);
170-
}
150+
HgiBufferCpuToGpuOp op;
151+
uint8_t* const cpuStaging = static_cast<uint8_t* const>(
152+
_buffer->GetCPUStagingAddress());
153+
154+
op.cpuSourceBuffer = cpuStaging;
155+
op.sourceByteOffset = 0;
156+
op.gpuDestinationBuffer = _buffer;
157+
op.destinationByteOffset = 0;
158+
op.byteSize = _head;
159+
blitCmds->CopyBufferCpuToGpu(op);
160+
blitCmds->InsertMemoryBarrier(HgiMemoryBarrierAll);
171161

172162
for (auto const &copyOp : _gpuCopyOps) {
173163
blitCmds->CopyBufferGpuToGpu(copyOp);
@@ -178,10 +168,7 @@ HdStStagingBuffer::Flush()
178168
_gpuCopyOps.clear();
179169
_head = 0;
180170

181-
if (_tripleBuffered) {
182-
_activeSlot++;
183-
_activeSlot = (_activeSlot < MULTIBUFFERING) ? _activeSlot : 0;
184-
}
171+
return true;
185172
}
186173

187174
PXR_NAMESPACE_CLOSE_SCOPE

pxr/imaging/hdSt/stagingBuffer.h

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,20 +55,19 @@ class HdStStagingBuffer
5555
HDST_API
5656
void StageCopy(HgiBufferCpuToGpuOp const &copyOp);
5757

58-
/// Flush the queued GPU to GPU blits from the calls to StageCopy. Resets
59-
/// the state for the next ResoureRegistry commit.
58+
/// Flush the queued GPU to GPU blits from the calls to StageCopy.
59+
/// Resets the state for the next ResoureRegistry commit.
60+
/// Returns false if there were no staged copies to flush. This is the
61+
/// case when UMA or ReBAR is available.
6062
HDST_API
61-
void Flush();
63+
bool Flush();
6264

6365
private:
64-
static constexpr int32_t MULTIBUFFERING = 3;
65-
6666
HdStResourceRegistry *_resourceRegistry;
67-
HgiBufferHandle _handles[MULTIBUFFERING];
67+
HgiBufferHandle _buffer;
6868
size_t _head;
6969
size_t _capacity;
70-
size_t _activeSlot;
71-
bool _tripleBuffered;
70+
bool _uniformMemoryAccess;
7271
std::vector<HgiBufferGpuToGpuOp> _gpuCopyOps;
7372
};
7473

0 commit comments

Comments
 (0)