@@ -17,9 +17,8 @@ HdStStagingBuffer::HdStStagingBuffer(HdStResourceRegistry *resourceRegistry)
1717 : _resourceRegistry(resourceRegistry)
1818 , _head(0 )
1919 , _capacity(0 )
20- , _activeSlot(0 )
2120{
22- _tripleBuffered = resourceRegistry->GetHgi ()->GetCapabilities ()->
21+ _uniformMemoryAccess = resourceRegistry->GetHgi ()->GetCapabilities ()->
2322 IsSet (HgiDeviceCapabilitiesBitsUnifiedMemory);
2423}
2524
@@ -31,16 +30,12 @@ HdStStagingBuffer::~HdStStagingBuffer()
3130void
3231HdStStagingBuffer::Deallocate ()
3332{
34- Hgi* hgi = _resourceRegistry->GetHgi ();
35-
36- for (size_t i = 0 ; i < MULTIBUFFERING; ++i) {
37- if (_handles[i]) {
38- hgi->DestroyBuffer (&_handles[i]);
39- }
33+ if (_buffer) {
34+ _resourceRegistry->GetHgi ()->DestroyBuffer (&_buffer);
35+ _buffer = {};
4036 }
4137
4238 _capacity = 0 ;
43- _activeSlot = 0 ;
4439}
4540
4641void
@@ -65,41 +60,40 @@ HdStStagingBuffer::StageCopy(HgiBufferCpuToGpuOp const ©Op)
6560 return ;
6661 }
6762
68- // When the to-be-copied data is 'large' doing the extra memcpy into the
63+ // Skip staging buffer if device supports unified memory or when
64+ // the to-be-copied data is 'large'. Doing the extra memcpy into the
6965 // stating buffer to avoid many small GPU buffer upload can be more
7066 // expensive than just submitting the CPU to GPU copy operation directly.
7167 // The value of 'queueThreshold' is estimated (when is the extra memcpy
7268 // into the staging buffer slower than immediately issuing a gpu upload)
73- static const int queueThreshold = 512 * 1024 ;
74- if (!_tripleBuffered && copyOp.byteSize > queueThreshold) {
69+ static constexpr size_t queueThreshold = 512 * 1024 ;
70+ if (_uniformMemoryAccess || copyOp.byteSize > queueThreshold) {
7571 HgiBlitCmds* blitCmds = _resourceRegistry->GetGlobalBlitCmds ();
7672 blitCmds->CopyBufferCpuToGpu (copyOp);
7773 return ;
7874 }
7975
80- HgiBufferHandle buffer = _handles[_activeSlot];
81- constexpr size_t recoveryRatio = 4 ;
76+ static constexpr size_t recoveryRatio = 4 ;
8277
83- // If there is no buffer in the active slot or it is either too small or
78+ // If there is no buffer or it is either too small or
8479 // substantially larger than the required size, recreate it.
85- if (!buffer ||
86- buffer ->GetDescriptor ().byteSize < _capacity ||
87- buffer ->GetDescriptor ().byteSize > _capacity * recoveryRatio) {
80+ if (!_buffer ||
81+ _buffer ->GetDescriptor ().byteSize < _capacity ||
82+ _buffer ->GetDescriptor ().byteSize > _capacity * recoveryRatio) {
8883 HgiBufferDesc bufferDesc;
8984 bufferDesc.byteSize = _capacity;
9085
9186 Hgi* hgi = _resourceRegistry->GetHgi ();
9287
93- if (buffer ) {
94- hgi->DestroyBuffer (&buffer );
88+ if (_buffer ) {
89+ hgi->DestroyBuffer (&_buffer );
9590 }
9691
97- _handles[_activeSlot] = hgi->CreateBuffer (bufferDesc);
98- buffer = _handles[_activeSlot];
92+ _buffer = hgi->CreateBuffer (bufferDesc);
9993 }
10094
101- size_t capacity = buffer ->GetDescriptor ().byteSize ;
102- uint8_t *cpuStaging = static_cast <uint8_t *>(buffer ->GetCPUStagingAddress ());
95+ size_t capacity = _buffer ->GetDescriptor ().byteSize ;
96+ uint8_t *cpuStaging = static_cast <uint8_t *>(_buffer ->GetCPUStagingAddress ());
10397
10498 if (TF_VERIFY (_head + copyOp.byteSize <= capacity)) {
10599 // Copy source into the staging buffer.
@@ -128,7 +122,7 @@ HdStStagingBuffer::StageCopy(HgiBufferCpuToGpuOp const ©Op)
128122 // Create a GPU to GPU blit operation to do the final copy.
129123 HgiBufferGpuToGpuOp gpuCopy;
130124
131- gpuCopy.gpuSourceBuffer = buffer ;
125+ gpuCopy.gpuSourceBuffer = _buffer ;
132126 gpuCopy.sourceByteOffset = _head;
133127 gpuCopy.byteSize = copyOp.byteSize ;
134128 gpuCopy.gpuDestinationBuffer = copyOp.gpuDestinationBuffer ;
@@ -141,33 +135,29 @@ HdStStagingBuffer::StageCopy(HgiBufferCpuToGpuOp const ©Op)
141135 }
142136}
143137
144- void
138+ bool
145139HdStStagingBuffer::Flush ()
146140{
147141 if (_head == 0 ) {
148- _gpuCopyOps. clear ();
149- return ;
142+ // UMA case
143+ return false ;
150144 }
151145
152146 HgiBlitCmds* blitCmds = _resourceRegistry->GetGlobalBlitCmds ();
153147
154148 blitCmds->PushDebugGroup (__ARCH_PRETTY_FUNCTION__);
155149
156- if (!_tripleBuffered) {
157- // If this isn't UMA then blit the staging buffer to GPU.
158- HgiBufferCpuToGpuOp op;
159- HgiBufferHandle buffer = _handles[_activeSlot];
160- uint8_t * const cpuStaging = static_cast <uint8_t * const >(
161- buffer->GetCPUStagingAddress ());
162-
163- op.cpuSourceBuffer = cpuStaging;
164- op.sourceByteOffset = 0 ;
165- op.gpuDestinationBuffer = buffer;
166- op.destinationByteOffset = 0 ;
167- op.byteSize = _head;
168- blitCmds->CopyBufferCpuToGpu (op);
169- blitCmds->InsertMemoryBarrier (HgiMemoryBarrierAll);
170- }
150+ HgiBufferCpuToGpuOp op;
151+ uint8_t * const cpuStaging = static_cast <uint8_t * const >(
152+ _buffer->GetCPUStagingAddress ());
153+
154+ op.cpuSourceBuffer = cpuStaging;
155+ op.sourceByteOffset = 0 ;
156+ op.gpuDestinationBuffer = _buffer;
157+ op.destinationByteOffset = 0 ;
158+ op.byteSize = _head;
159+ blitCmds->CopyBufferCpuToGpu (op);
160+ blitCmds->InsertMemoryBarrier (HgiMemoryBarrierAll);
171161
172162 for (auto const ©Op : _gpuCopyOps) {
173163 blitCmds->CopyBufferGpuToGpu (copyOp);
@@ -178,10 +168,7 @@ HdStStagingBuffer::Flush()
178168 _gpuCopyOps.clear ();
179169 _head = 0 ;
180170
181- if (_tripleBuffered) {
182- _activeSlot++;
183- _activeSlot = (_activeSlot < MULTIBUFFERING) ? _activeSlot : 0 ;
184- }
171+ return true ;
185172}
186173
187174PXR_NAMESPACE_CLOSE_SCOPE
0 commit comments